diff --git a/Project.toml b/Project.toml index 72819de..6026ce8 100644 --- a/Project.toml +++ b/Project.toml @@ -7,10 +7,10 @@ version = "0.0.1" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" +GeoDataFrames = "62cb38b5-d8d2-4862-a48e-6a340996859f" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" OpenAPI = "d5e62ea6-ddf3-4d43-8e4c-ad5e6c8bfd7d" -Shapefile = "8e980c4a-a4fe-5da2-b3a7-4b4b0353a2f4" [compat] DataFrames = "1.4" diff --git a/src/IPUMS.jl b/src/IPUMS.jl index f8c956a..a5e4222 100644 --- a/src/IPUMS.jl +++ b/src/IPUMS.jl @@ -6,8 +6,8 @@ module IPUMS download as dl import OpenAPI.Clients: Client - import Shapefile: - Table + import GeoDataFrames: + read using DataFrames: DataFrames, DataFrame, @@ -66,5 +66,6 @@ module IPUMS export parse_ddi export extract_download export load_ipums_extract + export load_ipums_nhgis end diff --git a/src/parsers/nhgis_parser.jl b/src/parsers/nhgis_parser.jl index 5257c9f..0bd5abb 100644 --- a/src/parsers/nhgis_parser.jl +++ b/src/parsers/nhgis_parser.jl @@ -1,9 +1,35 @@ -function load_nhgis_extract(fname::String) - +""" + load_ipums_nhgis(filepath::String) + + This function will take in the filename for an NHGIS Shapefile, and will + return a GeoDataFrame object containing the shapefile data. + +### Arguments + +- `filepath::String` - The directory path to an IPUMS NHGIS extracted shapefile. + +### Returns + + This function outputs a Julia GeoDataframe that contains all of the data from + the IPUMS NHGIS extract file. Further, the metadata fields of the Dataframe + contain the metadata parsed from the Shapefile. + +# Examples + +Let's assume we have an extract NHGIS file named `US_state_1790.shp` in a folder +that contains the other shapefile files. The user can open this Shapefile using +the following code. + +```julia-repl +julia> gdf = load_ipums_nhgis("test/testdata/nhgis0001_shapefile/US_state_1790.shp"); +``` + +""" +function load_ipums_nhgis(filepath::String) - gdf = DataFrame(Table(fname)) + gdf = read(fname) return gdf diff --git a/test/runtests.jl b/test/runtests.jl index 27754d6..b29324e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -22,3 +22,10 @@ end @test isa(metadata(df), Dict) @test isa(colmetadata(df, :YEAR), Dict) end + +@testset "NHGIS Parser" begin + datafile = "testdata/nhgis0001_shapefile/US_state_1790.shp" + df = load_ipums_nhgis(datafile) + @test size(df) == (16, 8) + +end \ No newline at end of file diff --git a/test/testdata/nhgis0001_shapefile/US_state_1790.dbf b/test/testdata/nhgis0001_shapefile/US_state_1790.dbf new file mode 100755 index 0000000..8fa09cf Binary files /dev/null and b/test/testdata/nhgis0001_shapefile/US_state_1790.dbf differ diff --git a/test/testdata/nhgis0001_shapefile/US_state_1790.prj b/test/testdata/nhgis0001_shapefile/US_state_1790.prj new file mode 100755 index 0000000..577f7ee --- /dev/null +++ b/test/testdata/nhgis0001_shapefile/US_state_1790.prj @@ -0,0 +1 @@ +PROJCS["USA_Contiguous_Albers_Equal_Area_Conic",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Albers"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-96.0],PARAMETER["Standard_Parallel_1",29.5],PARAMETER["Standard_Parallel_2",45.5],PARAMETER["Latitude_Of_Origin",37.5],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/test/testdata/nhgis0001_shapefile/US_state_1790.sbn b/test/testdata/nhgis0001_shapefile/US_state_1790.sbn new file mode 100755 index 0000000..ecf1321 Binary files /dev/null and b/test/testdata/nhgis0001_shapefile/US_state_1790.sbn differ diff --git a/test/testdata/nhgis0001_shapefile/US_state_1790.sbx b/test/testdata/nhgis0001_shapefile/US_state_1790.sbx new file mode 100755 index 0000000..da471f2 Binary files /dev/null and b/test/testdata/nhgis0001_shapefile/US_state_1790.sbx differ diff --git a/test/testdata/nhgis0001_shapefile/US_state_1790.shp b/test/testdata/nhgis0001_shapefile/US_state_1790.shp new file mode 100755 index 0000000..3757d29 Binary files /dev/null and b/test/testdata/nhgis0001_shapefile/US_state_1790.shp differ diff --git a/test/testdata/nhgis0001_shapefile/US_state_1790.shp.xml b/test/testdata/nhgis0001_shapefile/US_state_1790.shp.xml new file mode 100755 index 0000000..c424c2a --- /dev/null +++ b/test/testdata/nhgis0001_shapefile/US_state_1790.shp.xml @@ -0,0 +1,996 @@ + + + + + + + Minnesota Population Center + 2006/10/10 + Historic States/Territories for the 1790 Census of Population + vector digital data + + Minneapolis, Minnesota + Minnesota Population Center + + http://www.nhgis.org + + + US_state_1790 + + + This boundary files contains historic state/territory boundaries for which the U.S. Census Bureau tabulated data and was produced by the Minnesota Population Center as part of the National Historical Geographic Information System (NHGIS) project. The NHGIS is an National Science Foundation-sponsored project (Grant No. BCS0094908) to create a digital spatial-temporal database of all available historical US aggregate census materials. The available shapefiles on the NHGIS site represent version 1.0 of historical US state/territory boundary files for the 1790 to 2000 censuses. NHGIS staff dissolved the historic county boundaries to create these state/territory boundaries. The county boundary files were created by referencing a wide variety of sources and considerable care was taken during their production. U.S. Census Bureau TIGER/Line Census 2000 files provided the 1990 and 2000 county boundaries and the roads, hydrography, and public land survey lines required to construct historic county boundaries. Locations of historic county boundaries were derived from William Thorndale and William Dollarhide's Map Guide to the U.S. Federal Censuses (1987), various volumes of John H. Long's Atlas of Historical County Boundaries, the Atlas of Historical County Boundaries website (http://www.newberry.org/ahcbp/), and other state-specific sources. TIGER/Line spatial features that corresponded to boundaries in these sources were used to construct the proper historic boundaries. When a TIGER/Line feature was not available, we digitized the historic boundary from one of the map sources. Aggregate data from Michael Haines' Historical Demographic, Economic and Social Data: The United States, 1790-1970 (2001) and Richard Forstall's Population of States and Counties of the United States: 1790 to 1990 (1996) were used to determine whether a county was enumerated during a given census. If a county was not enumerated, notes from those sources were used to attach the county in question to the county with which it was enumerated. If a county was not enumerated and the notes provide no details, the county was considered 'unattached' and it was merged with other unattached land within the state or territory. The unattached areas were kept in the county boundary files so that the NHGIS could dissolve counties to construct states/territories. + These data are based upon work supported by the National Science Foundation under Grant No. BCS0094908 - in infrastructure grant provided for the social sciences. Its purpose is twofold. First, the NHGIS created and freely distributes a database incorporating all available aggregate census information for the United States between 1790 and 2000. The database contains information for a wide variety of statistical (blocks, block groups, census tracts, metropolitan statistical areas) and administrative units (places, minor civil divisions, counties, states). Second, the NHGIS produced and freely distributes boundary files for small areas (census tracts and counties) in the United States. Boundary files for tracts are available for the 1910-2000 censuses, and boundary files for counties and states/territories are available for the 1790-2000 censuses. The boundaries contained in this file do not necessarily represent legal county boundaries. Instead, they represent the county boundaries over which the U.S. Census Bureau tabulated and published data. + States are the primary legal political and administrative subdivisions of the United States of America. The 50 states plus the District of Columbia comprise the United States. Historically, the United States also contained territories - legal subdivisions that existed as a precursor to statehood. The NHGIS assigned a three-digit code to every state/territory. The codes were adapted from the FIPS coding system, and NHGIS staff created new codes for territories. For those historical territories, the NHGIS assigned a new code based on the alphabetical order of all states or territories that have ever existed. + en + + + + 1790 + + 1790 + + 1790 + + 1790 + + 1790 + + 1790 + + 1790 + + 1790 + + 1790 + + 1790 + + ground condition + + + In work + As work is completed + + + + -95.011232 + -66.006500 + 50.133165 + 28.717365 + + 93886.9405242258224.796357-701250.2549661406222.227617 + + + None + historical + census of population and housing + states + state + census boundaries + census geography + census data + population + housing + territory + territories + census area + + + None + + + None + + + All persons are granted a limited license to use and distribute this documentation and the available data, subject to the use constraints listed below. + This dataset was produced with an intended application at the state/territory, regional or national level. Appropriate uses include thematic mapping of census data and spatial analysis of census data. These unaltered data may be redistributed by a third party. If these data are altered or incorporated into another dataset, they are not to be redistributed without also: altering the name of the dataset, including a Content Standars for Digital Geospatial Metadata (FGDC-STD-001-1998) compliant metadata file that describes the dataset and reflects the alteration steps that makes the new dataset different from this one, and citing this dataset in the metadata as a source for the altered dataset using the source citation specified below. If these digital data are used in the production of a report or in the compilation of a standalone printed map, then this dataset is to be cited in the report or on the map using the source citation specified below. The following source citation should be used when citing this dataset: John S. Adams, William C. Block, Mark Lindberg, Robert McMaster, Steven Ruggles, and Wendy Thomas, National Historical Geographic Information System: Pre-release Version 0.1, Minneapolis: Minnesota Population Center, University of Minnesota, 2004. No fee may be charged for use or distribution. Publications and research reports based on the database must cite it appropriately. Users are requested to send a copy of any publications, research reports, or educational material making use of the data or documentation. Printed matter should be sent to: NHGIS Minnesota Population Center University of Minnesota 50 Willey Hall 225 19th Avenue South Minneapolis, MN 55455 + + + + Petra Noble + Minnesota Population Center + + Research Fellow + + physical address +
50 Willey Hall
+
225 19th Avenue South
+ Minneapolis + Minnesota + 55455 + USA +
+ 612-625-7375 + nhgis@pop.umn.edu +
+
+ National Science Foundation Grant No. BCS0094908 + Microsoft Windows XP Version 5.1 (Build 2600) Service Pack 2; ESRI ArcCatalog 9.2.0.1324 + Shapefile
+ + + All county codes were checked against codes in the available aggregate data to ensure accuracy and consistency. The boundary file may contain more or fewer counties than the aggregate data file. Sources for the boundary files sometimes depicted counties that contained no population; thus, they have no record in the aggregate data file. Sometimes, the aggregate data files contain records for county equivalents that are not mappable (e.g., military forts). Also, the boundary file sometimes contains an "unattached" county. This section of the state was not counted by the U.S. Census Bureau, but it has been included in order to depict the correct state/territory outline. + + All spatial data have been reviewed to assure topological consistency and completeness. All polygons close and do not overlap. No voids exist within polygons. ArcGIS topology features classes were used to identify and fix topological errors (e.g., overlaps, voids, missing polygons) in the county boundary datasets. The ArcGIS topology rules used to ensure logical consistency were 'Must not overlap' and 'Must be covered by'. + Version 1.0 of the state/territory boundary files contains states and territories. Most states/territories, although not all, have a matching record in the aggregate data files. Territories without a matching record were denoted on source materials as having "no population". We still made those territories because we had a geographic representation of it. Version 1.0 boundary files have not been generalized, and states/territories containing parts of the following bodies of water do not have historically accurate coastlines: the Great Lakes, Atlantic Ocean, Pacific Ocean, and the Gulf of Mexico. No counties overlap, no voids exist within states/territories, and any polygon belongs to one and only one state/territory. Every state has a FIPS-like code (GISJOIN) that uniquely identifies it. The aggregate data contains a matching code (GISJOIN) so that the files can be easily joined together. + + + The horizontal positional accuracy of the source data (U.S. Census Bureau TIGER/Line 2000 files) can be found at http://www.census.gov/geo/www/tlmetadata/tl2kmeta.txt. To create historic county boundaries, we used existing TIGER lines whereever possible. When no TIGER line existed, we digitized new linework from scanned and rectified Thorndale and Dollarhide (1987) maps for John H. Long (various) maps. These historic county boundaries do not represent legal boundaries and should only be used for thematic mapping and statistical analysis. + + + + + + + William Thorndale and William Dollarhide + 1987 + Map Guide to the U.S. Federal Censuses, 1790-1920 + map + + Baltimore, MD + Genealogical Publishing Co., Inc. + + + + various + paper + Thorndale and Dollarhide 1987 + Depicts county boundaries used in 1790-1920 census + + + + + Richard Forstall + 1966 + Population of States and Counties of the United States: 1790 to 1990 + + Washington, DC + U.S. Department of Commerce, Bureau of the Census, Population Division + + + + paper + Forstall 1996 + Lists population for each county enumerated + + + + + U.S. Census Bureau + 2001 + Census 2000 TIGER/Line files [machine-readable data file] + vector digital file + + Washington, DC + U.S. Department of Commerce, Bureau of the Census, Geography Division + + http://www.census.gov/geo/www/tiger/index.html + + + 100000 + online + U.S. Census Bureau 2001b + Base linework for the historic counties + + + + + John S. Adams, William C. Block, Mark Lindberg, Robert McMaster, Steven Ruggles, and Wendy Thomas + 2004 + National Historical Geographic Information System: Pre-release Version 0.1 + vector digital file + + Minneapolis, MN + Minnesota Population Center, University of Minnesota + + http://www.nhgis.org + + + 100000 + online + Adams, et al. 2004 + + + + + Peggy Tuck Sinko (comp.), Laura Rico (comp.) and John H. Long (ed.) + 2004 + Wyoming Historical Counties + map + + Chicago + The Newberry Library + + http://www.newberry.org/ahcbp/ie/index.html + + + various + online + Sinko, Rico and Long 2004e + + + + + Gordon DenBoer (comp.) and John H. Long (ed.) + 1995 + Atlas of Historical County Boundaries: Kentucky + map + + New York + Charles Scribner's Sons + + + + various + paper + DenBoer and Long 1995 + + + + + Gordon DenBoer (comp.) and John H. Long (ed.) + 2000 + Atlas of Historical County Boundaries: Minnesota + map + + New York + Charles Scribner's Sons + + + + various + paper + DenBoer and Long 2000 + + + + + Gordon DenBoer (comp.) and John H. Long (ed.) + 1998 + Atlas of Historical County Boundaries: North Carolina + map + + New York + Charles Scribner's Sons + + + + various + paper + DenBoer and Long 1998b + + + + + Gordon DenBoer (comp.) and John H. Long (ed.) + 1996 + Atlas of Historical County Boundaries: Pennsylvania + map + + New York + Charles Scribner's Sons + + + + various + paper + DenBoer and Long 1996 + + + + + Gordon DenBoer (comp.) and John H. Long (ed.) + 1997 + Atlas of Historical County Boundaries: Wisconsin + map + + New York + Charles Scribner's Sons + + + + various + paper + DenBoer and Long 1997b + + + + + Gordon DenBoer (comp.), John H. Long (comp.) and John H. Long (ed.) + 1994 + Atlas of Historical County Boundaries: Connecticut, Maine, Massachusette, Rhode Island + map + + New York + Simon and Schuster + + + + various + paper + DenBoer, Long and Long 1994 + + + + + Gordon DenBoer (comp.), Kathryn Ford Thorne (comp.), and John H. Long (ed.) + 1993 + Atlas of Historical County Boundaries: South Carolina + map + + New York + Simon and Schuster + + + + various + paper + DenBoer, Thorne, and Long 1993 + + + + + Gordon DenBoer (comp.), George E. Goodridge, Jr. (comp.) and John H. Long (ed.) + 1993 + Atlas of Historical County Boundaries: New Hampshire, Vermont + map + + New York + Simon and Schuster + + + + various + paper + DenBoer, Goodridge Jr., and Long 1993 + + + + + John H. Long (comp. and ed.) + 1996 + Atlas of Historical Boundaries: Delaware, Maryland, District of Columbia + map + + New York + Charles Scribner's Sons + + + + various + paper + Long 1996 + + + + + Peggy Tuck Sinko (comp.) and John H. Long (ed.) + 1996 + Atlas of Historical County Boundaries: Alabama + map + + New York + Charles Scribner's Sons + + + + various + paper + Sinko and Long 1996a + + + + + Peggy Tuck Sinko (comp.) and John H. Long (ed.) + 1996 + Atlas of Historical County Boundaries: Indiana + map + + New York + Charles Scribner's Sons + + + + various + paper + Sinko and Long 1996b + + + + + Peggy Tuck Sinko (comp.) and John H. Long (ed.) + 1997 + Atlas of Historical County Boundaries: Michigan + map + + New York + Charles Scribner's Sons + + + + various + paper + Sinko and Long 1997 + + + + + Peggy Tuck Sinko (comp.) and John H. Long (ed.) + 1993 + Atlas of Historical County Boundaries: Mississippi + map + + New York + Simon and Schuster + + + + various + paper + Sinko and Long 1993 + + + + + Peggy Tuck Sinko (comp.) and John H. Long (ed.) + 1998 + Atlas of Historical County Boundaries: Ohio + map + + New York + Charles Scribner's Sons + + + + various + paper + Sinko and Long 1998 + + + + + Peggy Tuck Sinko (comp.) and John H. Long (ed.) + 2000 + Atlas of Historical County Boundaries: Tennessee + map + + New York + Charles Scribner's Sons + + + + various + paper + Sinko and Long 2000 + + + + + Peggy Tuck Sinko (comp.), Kathryn Ford Thorne (comp.), and John H. Long (ed.) + 1997 + Atlas of Historical County Boundaries: Florida + map + + New York + Charles Scribner's Sons + + + + various + paper + Sinko, Thorne and Long 1997 + + + + + Kathryn Ford Thorne (comp.) and John H. Long (ed.) + 1993 + Atlas of Historical County Boundaries: New York + map + + New York + Simon and Schuster + + + + various + paper + Thorne and Long 1993 + + + + + Gordon DenBoer (comp.) and John H. Long (ed.) + 1997 + Atlas of Historical County Boundaries: Illinois + map + + New York + Charles Scribner's Sons + + + + various + paper + DenBoer and Long 1997a + + + + + Gordon DenBoer (comp.) and John H. Long (ed.) + 1998 + Atlas of Historical County Boundaries: Iowa + map + + New York + Charles Scribner's Sons + + + + various + paper + DenBoer and Long 1998a + + + + + Peggy Tuck Sinko (comp.), Laura Rico (comp.) and John H. Long (ed.) + 2004 + Virginia Historical Counties + map + + Chicago + The Newberry Library + + http://www.newberry.org/ahcbp/ie/index.html + + + various + online + Sinko, Rico and Long 2004b + + + + + Peggy Tuck Sinko (comp.), Laura Rico (comp.) and John H. Long (ed.) + 2004 + Montana Historical Counties + map + + Chicago + The Newberry Library + + http://www.newberry.org/ahcbp/ie/index.html + + + various + online + Sinko, Rico and Long 2004d + + + + + Peggy Tuck Sinko (comp.), Laura Rico (comp.) and John H. Long (ed.) + 2004 + West Virginia Historical Counties + map + + Chicago + The Newberry Library + + http://www.newberry.org/ahcbp/ie/index.html + + + various + online + Sinko, Rico and Long 2004c + + + + + Sylvia Nimmo + 1978 + Maps Showing the County Boundaries of Nebraska, 1854-1925 + map + + Papillion, NE + Sylvia Nimmo + + + + various + paper + Nimmo 1978 + + + + + John H. Long (ed.) and Stephen L. Hansen (comp.) + 1984 + Historical Atlas and Chronology of County Boundaries, 1788-1980, Volume 2: Illinois, Indiana, Ohio + map + + Boston + G.K. Hall & Co. + + + + various + paper + Long and Hansen 1984 + + + + + John H. Long (ed. and comp.) and Hugo P. Leaming (comp.) + 1984 + Historical Atlas and Chronology of County Boundaries, 1788-1980, Volume 3: Michigan, Wisconsin + map + + Boston + G.K. Hall & Co. + + + + various + paper + Long and Leaming 1984 + + + + + John H. Long (ed.), Mark P. Donovan (comp.), and Jeffrey D. Siebert (comp.) + 1984 + Historical Atlas and Chronology of County Boundaries, 1788-1980, Volume 5: Minnesota, North Dakota, South Dakota + map + + Boston + G.K. Hall & Co. + + + + various + paper + Long, Donovan and Siebert 1984 + + + + + U.S. Census Bureau + 1932 + Fifteenth Census of the United States: 1930. Population. Volume III + map + + Washington, D.C. + U.S. Government Printing Office + + + + various + paper + U.S. Census Bureau 1932 + + + + + U.S. Census Bureau + 1943 + Sixteenth Census of the United States: 1940. Population. Volume II + map + + Washington, D.C. + U.S. Government Printing Office + + + + various + paper + U.S. Census Bureau 1943 + + + + + U.S. Census Bureau + 1952 + Census of Population: 1950. Volume II. Characteristics of the Population + map + + Washington, D.C. + U.S. Government Printing Office + + + + various + paper + U.S. Census Bureau 1952c + + + + + U.S. Census Bureau + 1963 + Census of Population: 1960. Volume I. Characteristics of the Population + map + + Washington, D.C. + U.S. Government Printing Office + + + + various + paper + U.S. Census Bureau 1963 + + + + + U.S. Census Bureau + 1973 + Census of Population: 1970. Volume I, Characteristics of the Population + map + + Washington, D.C. + U.S. Government Printing Office + + + + various + paper + U.S. Census Bureau 1973 + + + + + U.S. Census Bureau + 1981 + 1980 Census of Population: Volume I, Characteristics of the Population + map + + Washington, D.C. + U.S. Government Printing Office + + + + various + paper + U.S. Census Bureau 1981 + + + + + John H. Long (ed. and comp.) and Adele Hast (comp.) + 1984 + Historical Atlas and Chronology of County Boundaries, 1788-1980, Volume 4: Iowa, Missouri + map + + Boston + G.K. Hall & Co. + + + + various + paper + Long and Hast 1984 + + + + + John H. Long (ed.and comp.) + 1984 + Historical Atlas and Chronology of County Boundaries, 1788-1980, Volume 1: Delaware, Maryland, New Jersey, Pennsylvania + map + + Boston + G.K. Hall & Co. + + + + various + paper + Long 1984 + + + + + Michael Haines + 2001 + Historical Demographic, Economic and Social Data: The United States, 1790-1970 + + Ann Arbor, MI + Inter-University Consortium for Political and Social Research + + + + Haines 2001 + + + These process steps describe, in general, the procedures used to construct the NHGIS' historical county boundaries. The production procedures vary depending on the available source materials. The base counties were derived from the U.S. Census Bureau's Census 2000 TIGER/Line files. Many historical boundaries were derived from linear features also available in the TIGER/Line files. Those boundaries not available in digital form were digitized from scanned, geo-referenced maps. + + + Using a custom AML, generated a statewide polygon coverage containing a union of 1990 and 2000 counties from the statewide census tract coverage. The projection, units, and datum for this coverage were geographic (latitude/longitude), decimal degrees, and NAD 83. + + + Using a custom AML, generated a statewide coverage containing two region subclasses. One subclass contained all linear hydrographic (water) features and the second contained all roads and non-visible boundaries. The projection, units, and datum for this coverage were geographic (latitude/longitude), decimal degrees, and NAD 83. + + + Using ArcCatalog 9.0, created feature datasets for each state in an ArcSDE database. The projection, unit, and datum of the feature datasets were geographic (latitude/longitude), decimal degrees, and NAD 83. The feature dataset's geographic extent was derived from an existing feature dataset containing all the state's census tracts. + + + Imported the county polygon coverage, the hydrographic lines, and the streets into the ArcSDE feature dataset as feature classes. ArcCatalog 9.0's import function was used. + + + Registered every state's featured data as versioned so that NHGIS staff could edit the feature classes. + + + The county polygon features classes were edited in ArcMap to create the historical county boundaries. Edits were based on the sources available for a given state. Long (various) and Thorndale and Dollarhide (1987) were the main sources for historical boundaries. Books depicting or describing historical county boundaries existed for some states, and these were used when available. First, the counties, water, and roads were loaded into ArcMap. Second, map topology was created for the county polygon feature class. Third, the roads or water features representing a historical boundary were selected. Fourth, the polygon or polygons that required editing were selected. Finally, the Construct Features option of the ArcMap topology toolbar was used to split the selected polygon(s) with the selected roads and/or water. If a historical county boundary followed a feature not in the roads or water feature classes, the feature was digitized into the proper feature class. + Thorndale and Dollarhide 1987 + U.S. Census Bureau 2000 + various John H. Long volumes + + + After all edits were complete, the state features datasets were copied from one ArcSDE instance to a second ArcSDE instance. + + + A topology features class was created for each state. The county polygon feature class was the only feature class participating in the topology. The topology rules were "Must not overlap" and "Must not have gaps." ArcCatalog's Create New Topology was used. + + + The topology feature class was used to find gaps and overlaps in the county polygon feature class. ArcMap's editing tools were then used to fix the errors. + + + A unique ID was then assigned to every polygon in a county polygon feature class. The ID was a concatenation of the state FIPS code and the polygon OID. Using the state FIPS code in the ID guaranteed that every polygon had a unique ID, even when appended together to make the entire country. + + + Using historical census data acquired from Michael Haines, a database table was created for every state and territory in Microsoft Excel. The table contained a COUNTYSHAPE_ID for every census in which a county had data tabulated for it. The COUNTYSHAPE_ID was a concatenation of the county FIPS code and the census decade. The database table was saved as a comma-delimited file and imported into the ArcSDE database using a custom VBA script. + Haines 2001 + + + Using a custom VBA script in ArcMap, a many-to-many relationship was established between county polygons and the database table containing the COUNTYSHAPE_ID. The polygons comprising a historical county were selected and "associated" to a row in the database table. + + + After finishing the many-to-many relationships for the COUNTYSHAPE_IDs for a state or territory, a custom Python script was run to generate the counties for every census. Topology rules were established to detect holes and overlaps in a census' counties. The topology feature class was used to locate holes and overlaps. Custom ArcMap VBA tools were used to fix the "associations" between a county and the polygons comprising the county. Individual polygons were deleted or added to an "association" in order to fix the topology error. This step was repeated until all the topology errors in a state were eliminated. + + + Checked the accuracy of historical county boundaries by comparing them to maps in Thorndale and Dollarhide (1987) or various John H. Long volumes. + Thorndale and Dollarhide 1987 + various John H. Long volumes + + + Projected each state feature dataset to USA Contiguous Albers Equal Area Conic projection, in meters and NAD83. Alaska was projected to the Alaska Albers Equal Area Conic projection, and Hawaii was projected to the Hawaii Albers Equal Area Conic projection. + + + Used the Integrate function (with a 2.0 meter cluster tolerance) in ArcCatalog to snap the state polygon feature classes to the state boundaries contained in a nationwide feature class. We did the Integrate so that we could append all states together to make a seamless nationwide feature class containing all counties. + + + Using a custom Python script, extracted counties for each decade from the contiguous 48 states. + + + Starting in 1790, we appended all extracted counties together to make a nationwide feature class, one for each decade (1790-2000). We also created a topology feature class with the Must Not Overlap and Must Not Have Gaps rules to check for errors introduced by the append. Any errors were corrected and then this step was repeated. + + + For each decade, we dissolved the county feature class on the NHGISST attribute to create the states/territories. We then joined the ICPSRST attribute from Haines 2001 and the state/territory name to each decadal feature class. This will provide users with the NHGIS and ICPSR (Haines 2001) codes and state/territory names. + ArcCatalog 9.0 + + Attached FGDC-compliant metadata to every nationwide feature class and converted the feature classes to shapefiles for distribution on the NHGIS data access site. + + Dataset copied.Server=sdenhgis.socsci.umn.edu; Service=5153; User=popgis; Version=SDE.DEFAULT20061206 + + + Vector + + + G-polygon + 0 + + + GT-polygon composed of chains + 11 + + SimplePolygonFALSE0FALSEFALSE + + + + + + coordinate pair + + 0.000000 + 0.000000 + + meters + + Albers Conical Equal Area29.50000045.500000-96.00000037.5000000.0000000.000000 + + North American Datum of 1983 + Geodetic Reference System 80 + 6378137.000000 + 298.257222 + + GCS_North_American_1983USA_Contiguous_Albers_Equal_Area_Conic + + + 1.000000 + Explicit elevation coordinate included with horizontal coordinates + + + + + + + US_state_1790Feature Class0 + NHGISSTNHGISSTString300this attribute is the NHGIS state code, which was adapted from the FIPS codes. NHGIS staff added a third character to the FIPS codes for territories.NHGISICPSRSTICPSRSTString300this attribute is the ICPSR state code, which came from the Historical, Demographic, Economic, and Social Data: The United States, 1790-2000 (Haines 2004, http://webapp.icpsr.umich.edu/cocoon/ICPSR-STUDY/02896.xml) dataset. Counties with no value in the ICPSRST column were not found in the ICPSR dataset.ICPSRSTATENAMSTATENAMString3this attribute is the NHGIS state/territory name. NHGIS staff adapted the county names from Thorndale and Dollarhide (1987), John Long (various), Haines (2004), and current-day FIPS codes.NHGISGISJOIN2GISJOIN2String3Alternate attribute used to join the boundary file to the aggregate data fileNHGISFIDFIDString25Internal feature number.ESRISequential unique whole numbers that are automatically generated.SHAPESHAPEString4Feature geometry.ESRICoordinates defining the features.GISJOINGISJOINString3Attribute used to join the boundary file to the aggregate data fileNHGISShapeShapeFloat19Feature geometry.ESRICoordinates defining the features.11SHAPE_AREASHAPE_AREAFloat19Area of feature in internal units squared.ESRIPositive real numbers that are automatically generated.11SHAPE_LENSHAPE_LENDouble1900 + + NHGISST - this attribute is the NHGIS state code, which was adapted from the FIPS codes. NHGIS staff added a third character to the FIPS codes for territories. + +ICPSRST - this attribute is the ICPSR state code, which came from the Historical, Demographic, Economic, and Social Data: The United States, 1790-2000 (Haines 2004, http://webapp.icpsr.umich.edu/cocoon/ICPSR-STUDY/02896.xml) dataset. States/territories with no value in the ICPSRST column were not found in the ICPSR dataset. + +STATENAM - this attribute is the NHGIS state/territory name. NHGIS staff adapted the county names from Thorndale and Dollarhide (1987), John Long (various), Haines (2004), and current-day FIPS codes. + +GISJOIN - this attribute is the NHGISST code and is a unique identifier for a state/territory in a given decade. Use this attribute to join the shapefile's attribute table to aggregate data tables downloaded from the NHGIS website. Please note that values in GISJOIN begin with "G" in order to retain leading zeros. This update took effect on May 7, 2008. + +GISJOIN2 - this attribute is an alternative linking attribute and is similar to GISJOIN except that its values do not begin with "G". GISJOIN2 may be used when linking to an aggregate data file extracted before May 7, 2008. + + + + + + + Minnesota Population Center + + + physical address +
50 Willey Hall
+
225 19th Ave S
+ Minneapolis + MN + 55455 +
+
+
+ Downloadable Data + + + + 0.026 + 0.000 + + +
+ + 20061010 + As necessary + + + + Minnesota Population Center + Petra Noble + + + REQUIRED: The mailing and/or physical address for the organization or individual. +
50 Willey Hall
+
225 19th Avenue South
+ Minneapolis + Minnesota + 55455 + USA +
+ 612-625-7375 + nhgis@pop.umn.edu +
+
+ FGDC Content Standards for Digital Geospatial Metadata + FGDC-STD-001-1998 + local time + + http://www.esri.com/metadata/esriprof80.html + ESRI Metadata Profile + + + http://www.esri.com/metadata/esriprof80.html + ESRI Metadata Profile + + + http://www.esri.com/metadata/esriprof80.html + ESRI Metadata Profile + + enhttp://www.esri.com/metadata/esriprof80.htmlESRI Metadata Profile
+ 20080424164435002006120610085000FALSE2008042416443500{AF0CEC66-F3DF-4E7B-A13E-B77DBA8BE218}CopyFeatures "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790" "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_CopyFeatures" # 0 0 0SelectData "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea" POPGIS.st1790_GIS_GIS2 "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2"AddField "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2" GISJOINA TEXT # # 3 # NULLABLE NON_REQUIRED # "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2"CalculateField "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2" GISJOINA [GISJOIN] VB # "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2"DeleteField "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2" GISJOIN "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2"AddField "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2" GISJOIN TEXT # # 4 # NULLABLE NON_REQUIRED # "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2"CalculateField "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2" GISJOIN ""G" & [GISJOINA]" VB # "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2"AddField "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2" GISJOIN2 TEXT # # 3 # NULLABLE NON_REQUIRED # "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2"CalculateField "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2" GISJOIN2 [GISJOINA] VB # "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2"DeleteField "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2" GISJOINA "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2"CopyFeatures "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790_GIS_GIS2" "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790" # 0 0 0CopyFeatures "Database Connections\sdenhgis.socsci.umn.edu (5153).sde\POPGIS.Contiguous_C_aea\POPGIS.st1790" G:\conflation\shapefiles\packing\gisjoin_gisjoin2_feature_class\state\us_state_1790\US_state_1790.shp # 0 0 0file://\\thalia.socsci.umn.edu\popgis\labpcs\data_projects\istads\shape_processing\state\US_state_1790Local Area Network20080424Microsoft Windows XP Version 5.1 (Build 2600) Service Pack 2; ESRI ArcCatalog 9.2.0.1324US_state_179093886.9405242258224.7963571406222.227617-701250.2549661-95.011232-66.006550.13316528.7173651ISO 19115 Geographic Information - MetadataDIS_ESRI1.0dataset002file://\\thalia.socsci.umn.edu\popgis\labpcs\conflation\shapefiles\packing\gisjoin_gisjoin2_feature_class\state\us_state_1790\US_state_1790.shpLocal Area Network0.000ShapefileUSA_Contiguous_Albers_Equal_Area_Conic0
diff --git a/test/testdata/nhgis0001_shapefile/US_state_1790.shx b/test/testdata/nhgis0001_shapefile/US_state_1790.shx new file mode 100755 index 0000000..04fe868 Binary files /dev/null and b/test/testdata/nhgis0001_shapefile/US_state_1790.shx differ