## Socioeconomic data and TOC entitlements
* Entitlements assigned to census tracts
* Which census tracts (what income levels or median household income) have seen TOC entitlements?

In [1]:
import intake
import numpy as np
import pandas as pd
import geopandas as gpd
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

bucket_name = 'city-planning-entitlements'

## Merge parcels with census tracts crosswalk

In [19]:
census = pd.read_parquet(f's3://{bucket_name}/data/final/census_cleaned.parquet')

crosswalk_parcels_tracts = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_parcels_tracts.parquet')

crosswalk_tracts_tiers = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_tracts_tiers.parquet')

parcels = gpd.read_file(
    f"s3://{bucket_name}/gis/intermediate/toc_eligible_parcels_with_entitlements.geojson"
)

parcels['max_tier'] = parcels.groupby('AIN')['TOC_Tier'].transform('max')
parcels = parcels[parcels.TOC_Tier == parcels.max_tier]
parcels = parcels.drop(columns = 'max_tier')

In [9]:
# Merge crosswalk with TOC-eligible parcels 
# left join because we might want socioeconomic characteristics of tracts that are not eligible? 
# Think about this more
m1 = pd.merge(crosswalk_parcels_tracts, parcels,  on = 'AIN', how = 'left', validate = '1:1')

In [16]:
# Aggregate to count number of parcels and entitlements
m2 = m1.groupby(['GEOID', 'pop', 'TOC_Tier']).agg(
        {'AIN':'count', 'num_TOC': 'sum', 'num_nonTOC':'sum'}).reset_index()

In [31]:
geoid = ["06037297110"]
crosswalk_tracts_tiers[crosswalk_tracts_tiers.GEOID.isin(geoid)]

Unnamed: 0,GEOID,TOC_Tier,parcelsqft,pct
742,6037297110,0,310141.351629,0.102856
743,6037297110,1,6751.078537,0.002239


In [32]:
m2[m2.GEOID.isin(geoid)]

Unnamed: 0,GEOID,pop,TOC_Tier,AIN,num_TOC,num_nonTOC
475,6037297110,4946,1.0,3,0.0,9.0
476,6037297110,4946,2.0,1,0.0,1.0
477,6037297110,4946,3.0,1,0.0,1.0


In [22]:
# Merge in crosswalk to allocate tract to TOC Tiers
m3 = pd.merge(m2, crosswalk_tracts_tiers, on = ['GEOID', 'TOC_Tier'], how = 'left', validate = '1:1')

In [29]:
m3.sort_values('GEOID')

Unnamed: 0,GEOID,pop,TOC_Tier,AIN,num_TOC,num_nonTOC,parcelsqft,pct
0,06037104404,3046,1.0,1,0.0,1.0,,
1,06037113212,3373,1.0,1,0.0,1.0,,
2,06037113237,4163,1.0,2,0.0,2.0,32232.356027,0.0011
3,06037115201,7039,1.0,1,0.0,1.0,,
4,06037115202,5088,1.0,1,0.0,1.0,,
...,...,...,...,...,...,...,...,...
476,06037297110,4946,2.0,1,0.0,1.0,,
477,06037297110,4946,3.0,1,0.0,1.0,,
479,06037297120,3262,2.0,1,0.0,1.0,,
478,06037297120,3262,1.0,2,0.0,2.0,,


In [30]:
# Hmm...should we do a 1:1 merge on GEOID and TOC_Tier?
geoid = ["06037297110"]
m3[m3.GEOID.isin(geoid)]

Unnamed: 0,GEOID,pop,TOC_Tier,AIN,num_TOC,num_nonTOC,parcelsqft,pct
475,6037297110,4946,1.0,3,0.0,9.0,6751.078537,0.002239
476,6037297110,4946,2.0,1,0.0,1.0,,
477,6037297110,4946,3.0,1,0.0,1.0,,


## Merge in commute info

In [25]:
census.table.value_counts()

incomerange    3589380
commute         316710
race            168912
vehicles         42228
tenure           42228
Name: table, dtype: int64

In [27]:
commute = census[census.table=="commute"]