# Merge in crosswalks and do aggregations

In [1]:
import numpy as np
import pandas as pd
import intake
import os

In [2]:
catalog = intake.open_catalog('../catalogs/*.yml')

In [3]:
census = pd.read_parquet('s3://hcid-cdbg-project-ita-data/data/raw/raw_census_cleaned.parquet')
census = census[(census.year==2017) & (census.table=='pop')]

In [4]:
council_districts = catalog.crosswalk_tracts_council_districts.read()
neighborhood_councils = catalog.crosswalk_tracts_neighborhood_councils.read()
zipcodes = catalog.crosswalk_tracts_zipcodes.read()
congressional_districts = catalog.crosswalk_tracts_congressional_districts.read()
neighborhoods = catalog.crosswalk_tracts_neighborhoods.read()

In [5]:
boundaries = {'council_districts': council_districts, 'neighborhood_councils': neighborhood_councils,
             'zipcodes': zipcodes, 'congressional_districts': congressional_districts, 'neighborhoods': neighborhoods}

merged_dfs = {}

for key, value in boundaries.items():
    merged_dfs[key] = pd.merge(census, value, on = 'GEOID', how = 'left', validate = 'm:1')


In [6]:
for key, value in merged_dfs.items():
    display(key)
    display(value.head())

'council_districts'

Unnamed: 0,GEOID,year,variable,table,main_var,second_var,new_var,num,pct,max_val,ID1,allocate1,ID2,allocate2,ID3,allocate3
0,6037531504,2017,B01003_001E,pop,pop,,pop,4196.0,1.0,,,,,,,
1,6037531503,2017,B01003_001E,pop,pop,,pop,2765.0,1.0,,,,,,,
2,6037206031,2017,B01003_001E,pop,pop,,pop,3589.0,1.0,1.0,14.0,1.0,,,,
3,6037186301,2017,B01003_001E,pop,pop,,pop,3119.0,1.0,1.0,13.0,1.0,,,,
4,6037183104,2017,B01003_001E,pop,pop,,pop,2248.0,1.0,1.0,14.0,1.0,,,,


'neighborhood_councils'

Unnamed: 0,GEOID,year,variable,table,main_var,second_var,new_var,num,pct,max_val,ID1,allocate1,ID2,allocate2,ID3,allocate3,ID4,allocate4,ID5,allocate5
0,6037531504,2017,B01003_001E,pop,pop,,pop,4196.0,1.0,,,,,,,,,,,
1,6037531503,2017,B01003_001E,pop,pop,,pop,2765.0,1.0,,,,,,,,,,,
2,6037206031,2017,B01003_001E,pop,pop,,pop,3589.0,1.0,2.0,14.0,0.612106,37.0,0.381227,,,,,,
3,6037186301,2017,B01003_001E,pop,pop,,pop,3119.0,1.0,1.0,26.0,1.0,,,,,,,,
4,6037183104,2017,B01003_001E,pop,pop,,pop,2248.0,1.0,1.0,39.0,1.0,,,,,,,,


'zipcodes'

Unnamed: 0,GEOID,year,variable,table,main_var,second_var,new_var,num,pct,max_val,ID1,allocate1,ID2,allocate2,ID3,allocate3,ID4,allocate4
0,6037531504,2017,B01003_001E,pop,pop,,pop,4196.0,1.0,,,,,,,,,
1,6037531503,2017,B01003_001E,pop,pop,,pop,2765.0,1.0,,,,,,,,,
2,6037206031,2017,B01003_001E,pop,pop,,pop,3589.0,1.0,4.0,90013.0,0.110205,90012.0,0.15735,90058.0,0.110013,90021.0,0.566114
3,6037186301,2017,B01003_001E,pop,pop,,pop,3119.0,1.0,2.0,90065.0,0.319336,91205.0,0.680664,,,,
4,6037183104,2017,B01003_001E,pop,pop,,pop,2248.0,1.0,1.0,90042.0,1.0,,,,,,


'congressional_districts'

Unnamed: 0,GEOID,year,variable,table,main_var,second_var,new_var,num,pct,max_val,ID1,allocate1,ID2,allocate2
0,6037531504,2017,B01003_001E,pop,pop,,pop,4196.0,1.0,,,,,
1,6037531503,2017,B01003_001E,pop,pop,,pop,2765.0,1.0,,,,,
2,6037206031,2017,B01003_001E,pop,pop,,pop,3589.0,1.0,1.0,634.0,1.0,,
3,6037186301,2017,B01003_001E,pop,pop,,pop,3119.0,1.0,1.0,634.0,1.0,,
4,6037183104,2017,B01003_001E,pop,pop,,pop,2248.0,1.0,1.0,634.0,1.0,,


'neighborhoods'

Unnamed: 0,GEOID,year,variable,table,main_var,second_var,new_var,num,pct,max_val,ID1,allocate1,ID2,allocate2,ID3,allocate3
0,6037531504,2017,B01003_001E,pop,pop,,pop,4196.0,1.0,,,,,,,
1,6037531503,2017,B01003_001E,pop,pop,,pop,2765.0,1.0,,,,,,,
2,6037206031,2017,B01003_001E,pop,pop,,pop,3589.0,1.0,1.0,24.0,1.0,,,,
3,6037186301,2017,B01003_001E,pop,pop,,pop,3119.0,1.0,1.0,35.0,1.0,,,,
4,6037183104,2017,B01003_001E,pop,pop,,pop,2248.0,1.0,1.0,46.0,1.0,,,,
