### Aggregate Mobility Data to the PUMA level for interactive visualizations

In [1]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
#read in puma shape files

ny_shapes = gpd.read_file(os.getcwd()+'/tl_2018_36_puma10/tl_2018_36_puma10.shp')
nj_shapes = gpd.read_file(os.getcwd()+'/tl_2018_34_puma10/tl_2018_34_puma10.shp')
ill_shapes = gpd.read_file(os.getcwd()+'/tl_2018_17_puma10/tl_2018_17_puma10.shp')
cali_shapes = gpd.read_file(os.getcwd()+'/tl_2018_06_puma10/tl_2018_06_puma10.shp')

In [3]:
#concatenate puma shapefiles
shapefile_list = [ny_shapes,nj_shapes,ill_shapes,cali_shapes]
all_shapes = pd.concat(shapefile_list)

In [4]:
#change working directory
os.chdir(os.getcwd()+'/Data')

In [5]:
#read in census tract shape files
all_ct_shape = gpd.read_file('all_cts.shp')

#read in ct level mobility data
nyc = pd.read_csv('nyc_vis.csv')
chic = pd.read_csv('chic_vis.csv')
sf = pd.read_csv('sf_vis.csv')
la = pd.read_csv('la_vis.csv')

In [17]:
#spatial join census tracts to puma areas
merged_shapes = gpd.sjoin(all_ct_shape,all_shapes,how='right')

In [42]:
chic_puma = pd.merge(left=chic,right=merged_shapes,how='left',left_on='dest_ct',right_on='GEOID')
nyc_puma = pd.merge(left=nyc,right=merged_shapes,how='left',left_on='dest_ct',right_on='GEOID')
la_puma = pd.merge(left=la,right=merged_shapes,how='left',left_on='dest_ct',right_on='GEOID')
sf_puma = pd.merge(left=sf,right=merged_shapes,how='left',left_on='dest_ct',right_on='GEOID')

In [43]:
#keep only needed columns
chic_puma = chic_puma[['origin_ct','dest_ct','od_counts','PUMACE10']]
nyc_puma = nyc_puma[['origin_ct','dest_ct','od_counts','PUMACE10']]
la_puma = la_puma[['origin_ct','dest_ct','od_counts','PUMACE10']]
sf_puma = sf_puma[['origin_ct','dest_ct','od_counts','PUMACE10']]

In [44]:
#rename puma columns
chic_puma.rename({'PUMACE10':'dest_puma'},inplace=True,axis=1)
nyc_puma.rename({'PUMACE10':'dest_puma'},inplace=True,axis=1)
la_puma.rename({'PUMACE10':'dest_puma'},inplace=True,axis=1)
sf_puma.rename({'PUMACE10':'dest_puma'},inplace=True,axis=1)

In [45]:
#join again to get origin puma
chic_puma = pd.merge(left=chic_puma,right=merged_shapes,how='left',left_on='origin_ct',right_on='GEOID')
nyc_puma = pd.merge(left=nyc_puma,right=merged_shapes,how='left',left_on='origin_ct',right_on='GEOID')
la_puma = pd.merge(left=la_puma,right=merged_shapes,how='left',left_on='origin_ct',right_on='GEOID')
sf_puma = pd.merge(left=sf_puma,right=merged_shapes,how='left',left_on='origin_ct',right_on='GEOID')

In [47]:
chic_puma = chic_puma[['origin_ct','dest_ct','od_counts','dest_puma','PUMACE10']]
nyc_puma = nyc_puma[['origin_ct','dest_ct','od_counts','dest_puma','PUMACE10']]
la_puma = la_puma[['origin_ct','dest_ct','od_counts','dest_puma','PUMACE10']]
sf_puma = sf_puma[['origin_ct','dest_ct','od_counts','dest_puma','PUMACE10']]

#rename puma columns
chic_puma.rename({'PUMACE10':'origin_puma'},inplace=True,axis=1)
nyc_puma.rename({'PUMACE10':'origin_puma'},inplace=True,axis=1)
la_puma.rename({'PUMACE10':'origin_puma'},inplace=True,axis=1)
sf_puma.rename({'PUMACE10':'origin_puma'},inplace=True,axis=1)

In [49]:
#aggregate to puma level
chic_puma = pd.DataFrame(chic_puma.groupby(['origin_puma','dest_puma'])['od_counts'].sum())
nyc_puma = pd.DataFrame(nyc_puma.groupby(['origin_puma','dest_puma'])['od_counts'].sum())
la_puma = pd.DataFrame(la_puma.groupby(['origin_puma','dest_puma'])['od_counts'].sum())
sf_puma = pd.DataFrame(sf_puma.groupby(['origin_puma','dest_puma'])['od_counts'].sum())

In [59]:
#reset indices
chic_puma.reset_index(inplace=True)
nyc_puma.reset_index(inplace=True)
la_puma.reset_index(inplace=True)
sf_puma.reset_index(inplace=True)

In [61]:
all_shapes.to_file("puma.geojson", driver='GeoJSON')

In [62]:
chic_puma.to_csv('chic_puma.csv')
nyc_puma.to_csv('nyc_puma.csv')
la_puma.to_csv('la_puma.csv')
sf_puma.to_csv('sf_puma.csv')