Author: Megan Tabbutt

Latest version: 03_18_21

Notes:

Resources: 

- PyPI: https://pypi.org/project/CensusData/
- Documentation: https://jtleider.github.io/censusdata/
- County level data: https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html


Datasets:

    ACS 5-year estimates (2005-2009 to 2015-2019),
    ACS 1-year estimates (2012-2019),
    ACS 3-year estimates (2010-2012 to 2011-2013),
    ACS 1-year supplemental estimates (2014-2019),
    Census 2010 Summary File 1.


## Questions:

- Can we assume it is confined to one state? 
- Will finding NSEW points to check for counties be sufficent? 


In [1]:
# Make Jupyter Notebook full screen 
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [42]:
import censusdata
import pandas as pd
import geopandas
from shapely.geometry import Point
import matplotlib.pyplot as plt

## Need interface from gtfs data to using the censusdata

In [60]:
# Pull in the latitude and longitude from gtfs data 
gtfs_data_path = "/Users/megantabbutt/simulator/data/mmt_gtfs/"
lat_lon_file = 'shapes.csv'
lat_lon_df = pd.read_csv(gtfs_data_path + lat_lon_file)
lat_lon_df.head(3)

Unnamed: 0,shape_id,shape_code,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
0,56620,2F,43.053972,-89.475246,1,0.0
1,56620,2F,43.053967,-89.474976,2,0.013
2,56620,2F,43.053933,-89.474855,3,0.0198


### make into a geoDF if you want to plot below to check 
gdf = geopandas.GeoDataFrame(lat_lon_df, geometry=geopandas.points_from_xy(lat_lon_df.shape_pt_lon, lat_lon_df.shape_pt_lat))
gdf.head(3)

In [62]:
# Convert to shapely point objects in tuples
ID_points = [Point(lat_lon_df['shape_pt_lat'][i], lat_lon_df['shape_pt_lon'][i]) for i in range(len(lat_lon_df))]
ID_points[0].coords[:]

[(43.053971999999995, -89.475246)]

In [63]:
# Find the farthest points to check if multiple counties in the data exist
farthest_points = [0, 0, 0, 0] # North, East, South, West
farthest_points[0], farthest_points[1], farthest_points[2], farthest_points[3] = i, i, i, i

for i in ID_points:
    if i.x > farthest_points[0].x: # north
        farthest_points[0] = i
    if i.y > farthest_points[1].y: # east
        farthest_points[1] = i
    if i.x < farthest_points[2].x: # south
        farthest_points[2] = i
    if i.y < farthest_points[3].y: # west
        farthest_points[3] = i

### check that the points are doing the right things... YES
far_points = pd.DataFrame(
    {'Direction': ['North', 'East', 'South', 'West'],
     'Latitude': [farthest_points[0].x, farthest_points[1].x, farthest_points[2].x, farthest_points[3].x],
     'Longitude': [farthest_points[0].y, farthest_points[1].y, farthest_points[2].y, farthest_points[3].y]})

far_points_gdf = geopandas.GeoDataFrame(far_points, geometry=geopandas.points_from_xy(far_points.Longitude, far_points.Latitude))

fig, ax = plt.subplots(1, 1, figsize=(16, 16))
gdf.plot(ax=ax)
far_points_gdf.plot(ax=ax, color='orange', zorder=1)

In [64]:
# traverse the whole list? or will it be confined to one state? 
countyData = geopandas.read_file("/Users/megantabbutt/Repos/Civil-Hacking-Projects/BusSimProject/cb_2018_us_county_500k/cb_2018_us_county_500k.shp")
Wisconsin = countyData[countyData['STATEFP']=='55']
Wisconsin

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
497,55,003,01581061,0500000US55003,55003,Ashland,06,2706476264,3230710864,"MULTIPOLYGON (((-90.46546 47.00259, -90.46532 ..."
498,55,043,01581081,0500000US55043,55043,Grant,06,2970403781,94520011,"POLYGON ((-91.15681 42.98817, -91.15302 42.990..."
499,55,113,01581116,0500000US55113,55113,Sawyer,06,3257216619,239883874,"POLYGON ((-91.55095 46.04111, -91.55064 46.055..."
702,55,023,01581071,0500000US55023,55023,Crawford,06,1477970887,73976438,"POLYGON ((-91.21499 43.36801, -91.21336 43.370..."
703,55,037,01581078,0500000US55037,55037,Florence,06,1264232458,24111636,"POLYGON ((-88.68331 46.01414, -88.68309 46.014..."
...,...,...,...,...,...,...,...,...,...,...
2991,55,117,01581118,0500000US55117,55117,Sheboygan,06,1324851063,1967647733,"POLYGON ((-88.16227 43.89151, -88.12217 43.891..."
2992,55,139,01581129,0500000US55139,55139,Winnebago,06,1125846130,372645352,"POLYGON ((-88.88667 44.24262, -88.85030 44.242..."
3107,55,029,01581074,0500000US55029,55029,Door,06,1248277386,4890604620,"MULTIPOLYGON (((-86.95617 45.35549, -86.95463 ..."
3169,55,081,01581101,0500000US55081,55081,Monroe,06,2333330399,18586826,"POLYGON ((-90.97807 44.12853, -90.97728 44.129..."


In [None]:
point = Point(ID_points[0])
point
point.within(Wisconsin['geometry'])

### Fixes:

In [None]:
import CensusDataWrapper as CDW

In [None]:
acs5_2015 = CDW.Census('acs5', 2015)

In [None]:
acs5_2015.getSummary()

In [None]:
acs5_2015.getStateCodes()

In [None]:
acs5_2015.getStateCodes("Wisco")

In [None]:
acs5_2015.getStateCodes("Wisconsin")

In [None]:
import censusdata

In [None]:
data = censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2015)
data['Wisconsin']

In [None]:
sample = censusdata.search('acs5', 2019, 'label', 'RACE')
# censusdata.search('survey type to search', 'year', 'type of field to search', 'search term')

In [None]:
sample

In [None]:
censustable_race = censusdata.censustable('acs5', 2018, 'B02001')

In [None]:
censustable_race

In [None]:
censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2015)

In [None]:
Wisconsin = censusdata.geographies(censusdata.censusgeo([('state', '55')]), 'acs5', 2015)

In [None]:
censusdata.geographies(censusdata.censusgeo([('state', '55'), ('county', '*')]), 'acs5', 2015)

In [None]:
DaneCounty = censusdata.geographies(censusdata.censusgeo([('state', '55'), ('county', '025')]), 'acs5', 2015)

In [None]:
DaneCounty

In [None]:
print(len(censusdata.geographies(censusdata.censusgeo([('state', '55'), ('county', '025'), ('block group', '*')]), 'acs5', 2015)))
censusdata.geographies(censusdata.censusgeo([('state', '55'), ('county', '025'), ('block group', '*')]), 'acs5', 2015)

In [None]:
DaneCountyBG = censusdata.download('acs5', 2019,
                             censusdata.censusgeo([('state', '55'), ('county', '025'), ('block group', '*')]),
                             list(censustable_race))

In [None]:
DaneCountyBG

In [None]:
type(censustable_race)

In [None]:
for i in censustable_race:
    print(i)
censustable_race = list(censustable_race)
censustable_race