## Socioeconomic data and TOC entitlements
* Entitlements assigned to census tracts
* Which census tracts (what income levels or median household income) have seen TOC entitlements?
* See if tract is composed of mostly TOC-eligible parcels
* Then look at Census characteristics of mostly TOC-eligible tracts vs not

In [1]:
import boto3
import geopandas as gpd
import intake
import numpy as np
import os
import pandas as pd
import pcts_census_utils
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

## Identify this dichotomy of tracts that are mostly TOC-eligible vs not

In [3]:
def toc_tracts_clean_and_aggregate():
    # Import data
    crosswalk_parcels_tracts = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_parcels_tracts.parquet')

    s3.download_file(bucket_name, f'gis/intermediate/TOC_Parcels.parquet', '../gis/TOC_Parcels.parquet')
    toc_parcels = gpd.read_parquet('../gis/TOC_Parcels.parquet')
    os.remove('../gis/TOC_Parcels.parquet')

    df = pd.merge(crosswalk_parcels_tracts, 
         toc_parcels[toc_parcels.TOC_Tier > 0].drop(columns = 'TOC_Tier'), 
         on = 'AIN', how = 'left', validate = '1:1')
    
    # Get rid of duplicate AIN's
    df = df[df.num_AIN == 1]
    
    # Tag if the parcel counts as in TOC tier or not
    def in_tier(row):
        if row.TOC_Tier != 0:
            return 1
        elif row.TOC_Tier == 0:
            return 0

    df = df.assign(
        in_tier = df.apply(in_tier, axis=1)
    )

    # Aggregate by in_tier 
    df = (df[df.in_tier==1]
          .groupby(["GEOID", "pop", "parcel_tot", "in_tier"])
          .agg({"num_AIN": "sum",
               "parcelsqft":"sum"})
          .reset_index()
         )
    
    # Also, count the total of AIN within each tract
    total_AIN = (crosswalk_parcels_tracts[crosswalk_parcels_tracts.num_AIN == 1]
                    .groupby(['GEOID'])
                    .agg({'num_AIN':'sum'})
                    .rename(columns = {'num_AIN':'total_AIN'})
                    .reset_index()
                   )
    
    # Merge together 
    df2 = pd.merge(df, total_AIN, on = 'GEOID', how = 'left', validate = 'm:1')
    
    # Calculate the % of AIN that falls within TOC tiers and % of area within TOC tiers
    df2 = df2.assign(
        pct_AIN = df2.num_AIN / df2.total_AIN,
        pct_area = df2.parcelsqft / df2.parcel_tot,
    )
    
    return df2

In [4]:
df = toc_tracts_clean_and_aggregate()

In [5]:
df.head()

Unnamed: 0,GEOID,pop,parcel_tot,in_tier,num_AIN,parcelsqft,total_AIN,pct_AIN,pct_area
0,6037104320,6211,7666438.0,1,396,2828009.0,901,0.439512,0.368882
1,6037104401,3775,3926756.0,1,166,1334841.0,504,0.329365,0.339935
2,6037104404,3046,4030609.0,1,260,2510833.0,451,0.576497,0.622941
3,6037104610,3501,4530658.0,1,437,3720807.0,531,0.822976,0.821251
4,6037104620,3449,4296846.0,1,230,1753480.0,573,0.401396,0.408085


## Merge in Census data

In [3]:
census = pd.read_parquet(f's3://{bucket_name}/data/final/census_cleaned.parquet')

In [15]:
# Function to transform percent tables with aggregation option
def transform_census_percent(table_name, year, main_var, aggregate_me, aggregated_row_name, numer, denom):
    """
    table_name: str
    year: numeric
    main_var: str, based on main_var column and pick only one for which the processed df is derived from
    aggregate_me: list, a list of new_var groups to aggregate into 1 group
    aggregated_row_name: str, will be new name for this aggregated group
    numer: str, based on new_var column
    denom: str, based on new_var column
    """
    df = pcts_census_utils.grab_census_table(table_name, year, main_var)

    df2 = pcts_census_utils.aggregate_group(df, aggregate_me, name = aggregated_row_name)
    
    cols = [denom, numer]
    df3 = pcts_census_utils.make_wide(df2, cols)
    
    new_var = f"pct_{aggregated_row_name}"

    df3 = (df3.assign(
        new = df3[numer] / df3[denom],
        ).rename(columns = {'new': new_var})
    )
    
    return df3

In [16]:
# Commute mode
commute_modes = ["workers_transit", "workers_walk", "workers_bike"]

commute = transform_census_percent("commute", 2018, "workers", 
                                   commute_modes, "non_car", 
                                   "non_car", "workers_total")

commute.head()

Unnamed: 0,GEOID,non_car,workers_total,pct_non_car
0,6037101110,46,1927,0.023871
1,6037101122,11,1907,0.005768
2,6037101210,171,2770,0.061733
3,6037101220,54,1513,0.035691
4,6037101300,86,2041,0.042136


In [24]:
# Renter occupied
tenure_group = ["pop_renter"]

tenure = transform_census_percent("tenure", 2018, "pop", 
                                  tenure_group, "pop_renter", 
                                  "pop_renter", "pop_total")

tenure.head()

Unnamed: 0,GEOID,pop_renter,pop_total,pct_pop_renter
0,6037101110,2199,4219,0.521214
1,6037101122,577,3234,0.178417
2,6037101210,5247,5987,0.876399
3,6037101220,2110,3497,0.603374
4,6037101300,353,4250,0.083059


In [25]:
# Zero vehicle 
vehicle_group = ["workers_veh0"]

vehicle = transform_census_percent("vehicles", 2018, "workers", 
                                   vehicle_group, "zero_veh_workers",
                                   "workers_veh0", "workers_total")

vehicle.head()

KeyError: 'workers_veh0'

In [35]:
for i in ["income", "commute", "race", "tenure", "vehicles"]:
    df = census[(census.table==i) & (census.year==2018)]
    display(df.var_type.value_counts())

dollar    23460
number    23460
Name: var_type, dtype: int64

percent    28152
number      7038
Name: var_type, dtype: int64

number    18768
Name: var_type, dtype: int64

number    4692
Name: var_type, dtype: int64

number     2346
percent    2346
Name: var_type, dtype: int64

In [36]:
df = census[(census.table=="commute") & (census.year==2018)]
display(df.var_type.value_counts())

percent    28152
number      7038
Name: var_type, dtype: int64

In [37]:
df

Unnamed: 0,GEOID,variable,year,table,main_var,last2,second_var,new_var,var_type,pct,num
120,06037101110,S0801_C01_001,2018,commute,workers,01,total,workers_total,number,1.000,1927.0
121,06037101110,S0801_C01_003,2018,commute,workers,03,car1,workers_car1,percent,0.824,1588.0
122,06037101110,S0801_C01_009,2018,commute,workers,09,transit,workers_transit,percent,0.024,46.0
123,06037101110,S0801_C01_010,2018,commute,workers,10,walk,workers_walk,percent,0.000,0.0
124,06037101110,S0801_C01_011,2018,commute,workers,11,bike,workers_bike,percent,0.000,0.0
...,...,...,...,...,...,...,...,...,...,...,...
4579915,06037990300,S0801_C03_001,2018,commute,female,01,total,female_total,number,0.000,0.0
4579916,06037990300,S0801_C03_003,2018,commute,female,03,car1,female_car1,percent,,
4579917,06037990300,S0801_C03_009,2018,commute,female,09,transit,female_transit,percent,,
4579918,06037990300,S0801_C03_010,2018,commute,female,10,walk,female_walk,percent,,


In [7]:
census.table.value_counts()

incomerange    3589380
income          422280
commute         316710
race            168912
tenure           42228
vehicles         42228
Name: table, dtype: int64