### Retrieve Census County Centroid Coordinate Data

In [1]:
import zipfile
import urllib.request
import shutil
import geopandas as gpd
from pathlib import Path

from shutil import unpack_archive

#### Create directory path for temp storage of downloads

In [2]:
ftpTmp = Path('../data/ftp-temp/')
ftpTmp.mkdir(exist_ok=True, parents=True)

#### Function - Load the shapefile information into GeoPandas

In [12]:
def get_data(filename):
    gdf = gpd.read_file(filename)
    return gdf

#### Extract files from compressed file (zip)

In [4]:
def extract_files(zip_path_file):
    unpack_archive(str(zip_path_file), extract_dir=str(ftpTmp))

#### URL for County Shapefile

In [5]:
url = 'https://www2.census.gov/geo/tiger/TIGER2018/COUNTY/tl_2018_us_county.zip'
file_name = 'tl_2018_us_county.zip'

In [6]:
# create a file path for the download
zip_path_file = ftpTmp.joinpath(file_name)

In [7]:
with urllib.request.urlopen(url) as response, open(zip_path_file, 'wb') as out_file:
    shutil.copyfileobj(response, out_file)
    with zipfile.ZipFile(file_name) as zf:
        zf.extractall()

In [8]:

# call extract function
extract_files(zip_path_file)

# assign file handles to each of the files extracted
cpg, dbf, prj, shp, xml1, xml2, shx = [filename for filename in zf.namelist()]
print(shp)

# add a path to the shapefile
shape_file = ftpTmp.joinpath(shp)

# call function to create a geodataframe
county_gdf = get_data(shape_file)

# call function to append the geodataframe to a list
#count_records = append_list(tract_gdf, count_records)

#removes extracted files and zip file
zip_path_file.unlink()
shape_file.unlink()
file_to_rem = ftpTmp.joinpath(cpg)
file_to_rem.unlink()    
file_to_rem = ftpTmp.joinpath(dbf)
file_to_rem.unlink()    
file_to_rem = ftpTmp.joinpath(xml1)
file_to_rem.unlink()    
file_to_rem = ftpTmp.joinpath(xml2)
file_to_rem.unlink()    
file_to_rem = ftpTmp.joinpath(shx)
file_to_rem.unlink()    
file_to_rem = ftpTmp.joinpath(prj)
file_to_rem.unlink()
ftpTmp.rmdir()

tl_2018_us_county.shp


In [26]:
county_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3233 entries, 0 to 3232
Data columns (total 18 columns):
STATEFP     3233 non-null object
COUNTYFP    3233 non-null object
COUNTYNS    3233 non-null object
GEOID       3233 non-null object
NAME        3233 non-null object
NAMELSAD    3233 non-null object
LSAD        3233 non-null object
CLASSFP     3233 non-null object
MTFCC       3233 non-null object
CSAFP       1231 non-null object
CBSAFP      1899 non-null object
METDIVFP    113 non-null object
FUNCSTAT    3233 non-null object
ALAND       3233 non-null int64
AWATER      3233 non-null int64
INTPTLAT    3233 non-null object
INTPTLON    3233 non-null object
geometry    3233 non-null object
dtypes: int64(2), object(16)
memory usage: 454.7+ KB


In [29]:
cols=['STATEFP','COUNTYFP','COUNTYNS','NAME','NAMELSAD','LSAD','CLASSFP','MTFCC','CSAFP','CBSAFP',
                'METDIVFP','FUNCSTAT','ALAND','AWATER','geometry']
county_gdf.drop(cols, axis=1, inplace=True)

In [35]:
county_gdf.rename(columns={'GEOID': 'county_geoid','INTPTLAT': 'county_lat', 'INTPTLON': 'county_lon'}, inplace=True)

In [36]:
county_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3233 entries, 0 to 3232
Data columns (total 3 columns):
county_geoid    3233 non-null object
county_lat      3233 non-null object
county_lon      3233 non-null object
dtypes: object(3)
memory usage: 75.9+ KB


#### Create directory path for output file

In [37]:
outputPath = Path('../data/county/')
outputPath.mkdir(exist_ok=True, parents=True)

#### Name zip file

In [38]:
output_file = 'county_centroids.csv'

#### Create full path with zip file

In [39]:
output = outputPath.joinpath(output_file)

#### Write dataframe to compressed CSV

In [40]:
county_gdf.to_csv(output, index=None)
print('Number of tract records written to file:',"{:,}".format(len(county_gdf)))

Number of tract records written to file: 3,233
