## Socioeconomic data and TOC entitlements
* Entitlements assigned to census tracts
* Which census tracts (what income levels or median household income) have seen TOC entitlements?
* See if tract is composed of mostly TOC-eligible parcels
* Then look at Census characteristics of mostly TOC-eligible tracts vs not

In [1]:
import boto3
import geopandas as gpd
import intake
import numpy as np
import os
import pandas as pd
import pcts_census_utils
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

## Identify this dichotomy of tracts that are mostly TOC-eligible vs not

In [3]:
def toc_tracts_clean_and_aggregate():
    # Import data
    crosswalk_parcels_tracts = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_parcels_tracts.parquet')

    s3.download_file(bucket_name, f'gis/intermediate/TOC_Parcels.parquet', '../gis/TOC_Parcels.parquet')
    toc_parcels = gpd.read_parquet('../gis/TOC_Parcels.parquet')
    os.remove('../gis/TOC_Parcels.parquet')

    df = pd.merge(crosswalk_parcels_tracts, 
         toc_parcels[toc_parcels.TOC_Tier > 0].drop(columns = 'TOC_Tier'), 
         on = 'AIN', how = 'left', validate = '1:1')
    
    # Get rid of duplicate AIN's
    df = df[df.num_AIN == 1]
    
    # Tag if the parcel counts as in TOC tier or not
    def in_tier(row):
        if row.TOC_Tier != 0:
            return 1
        elif row.TOC_Tier == 0:
            return 0

    df = df.assign(
        in_tier = df.apply(in_tier, axis=1)
    )
    
    # Aggregate by in_tier 
    df = (df.groupby(["GEOID", "pop", "parcel_tot", "in_tier"])
          .agg({"num_AIN": "sum",
               "parcelsqft":"sum"})
          .reset_index()
         )
    
    # If GEOID has 2 observations, one in_tier==1 and other in_tier==0, let's keep the in_tier==1
    df["obs"] = df.groupby("GEOID").cumcount() + 1
    df["max_obs"] = df.groupby("GEOID")["obs"].transform("max")
    
    df = (df[(df.max_obs == 1) | 
             ((df.in_tier == 1) & (df.max_obs == 2))]
          .drop(columns = ["obs", "max_obs"])
         )
    
    
    # Also, count the total of AIN within each tract
    total_AIN = (crosswalk_parcels_tracts[crosswalk_parcels_tracts.num_AIN == 1]
                    .groupby(['GEOID'])
                    .agg({'num_AIN':'sum'})
                    .rename(columns = {'num_AIN':'total_AIN'})
                    .reset_index()
                   )
    
    # Merge together 
    df2 = pd.merge(df, total_AIN, on = 'GEOID', how = 'left', validate = 'm:1')
    
    # Calculate the % of AIN that falls within TOC tiers and % of area within TOC tiers
    df2 = (df2.assign(
            pct_AIN = df2.num_AIN / df2.total_AIN,
            pct_area = df2.parcelsqft / df2.parcel_tot,
        ).sort_values("GEOID")
           .reset_index(drop=True)
    )
    
    return df2

In [4]:
df = toc_tracts_clean_and_aggregate()

## Grab Census data

In [5]:
keep_me = ["medincome_total", "medincome_white", "medincome_hisp"]
income = pcts_census_utils.transform_census_income("income", 2018, "medincome", keep_me)

income.head()

Unnamed: 0,GEOID,medincome_hisp,medincome_total,medincome_white
0,6037101110,53625,53077,49596
1,6037101122,89257,88953,88488
2,6037101210,32039,32119,30713
3,6037101220,46393,41728,39330
4,6037101300,187989,86914,85938


In [6]:
# Commute mode
commute_modes = ["workers_transit", "workers_walk", "workers_bike"]

commute = pcts_census_utils.transform_census_percent("commute", 2018, "workers", 
                                   commute_modes, "non_car", 
                                   "non_car", "workers_total")

commute.head()

Unnamed: 0,GEOID,non_car,workers_total,pct_non_car
0,6037101110,46,1927,0.023871
1,6037101122,11,1907,0.005768
2,6037101210,171,2770,0.061733
3,6037101220,54,1513,0.035691
4,6037101300,86,2041,0.042136


In [7]:
# Renter occupied
tenure_group = ["pop_renter"]

tenure = pcts_census_utils.transform_census_percent("tenure", 2018, "pop", 
                                  tenure_group, "pop_renter", 
                                  "pop_renter", "pop_total")

tenure.head()

Unnamed: 0,GEOID,pop_renter,pop_total,pct_pop_renter
0,6037101110,2199,4219,0.521214
1,6037101122,577,3234,0.178417
2,6037101210,5247,5987,0.876399
3,6037101220,2110,3497,0.603374
4,6037101300,353,4250,0.083059


In [8]:
# Zero vehicle 
vehicle_group = ["workers_veh0"]

vehicle = pcts_census_utils.transform_census_percent("vehicles", 2018, "workers", 
                                   vehicle_group, "zero_veh_workers",
                                   "zero_veh_workers", "workers_total")

vehicle.head()

Unnamed: 0,GEOID,workers_total,zero_veh_workers,pct_zero_veh_workers
0,6037101110,1927,0,0.0
1,6037101122,1907,8,0.004195
2,6037101210,2770,114,0.041155
3,6037101220,1513,54,0.035691
4,6037101300,2041,10,0.0049


In [9]:
# Race 
race_group = ["pop_black", "pop_asian", "pop_amerind", "pop_pacis", "pop_race2", "pop_other"]

race = pcts_census_utils.transform_census_percent("race", 2018, "pop", 
                                   race_group, "non_white",
                                   "non_white", "pop_total")

race.head()

Unnamed: 0,GEOID,non_white,pop_total,pct_non_white
0,6037101110,1000,4314,0.231803
1,6037101122,402,3239,0.124112
2,6037101210,1281,6052,0.211666
3,6037101220,1003,3497,0.286817
4,6037101300,592,4297,0.137771


## Merge in Census data

In [10]:
# Grab the geometry for tracts
census_tracts = catalog.census_tracts.read()
census_tracts = (census_tracts[["GEOID10", "geometry"]]
                 .rename(columns = {"GEOID10": "GEOID"})
                 .to_crs("EPSG:4326")
                )

In [11]:
m1 = pd.merge(df, income, on = "GEOID", how = "left", validate = "1:1")
m2 = pd.merge(m1, commute, on = "GEOID", how = "left", validate = "1:1")
m3 = pd.merge(m2, vehicle, on = ["GEOID", "workers_total"], how = "left", validate = "1:1")
m4 = pd.merge(m3, tenure, on = "GEOID", how = "left", validate = "1:1")
m5 = pd.merge(m4, race, on = ["GEOID", "pop_total"], how = "left", validate = "1:1")
m6 = pd.merge(m5, census_tracts, on = "GEOID", how = "left", validate = "1:1")

m6.head()

Unnamed: 0,GEOID,pop,parcel_tot,in_tier,num_AIN,parcelsqft,total_AIN,pct_AIN,pct_area,medincome_hisp,...,workers_total,pct_non_car,zero_veh_workers,pct_zero_veh_workers,pop_renter,pop_total,pct_pop_renter,non_white,pct_non_white,geometry
0,6037101110,4566,8705370.0,0,1159,8618394.0,1159,1.0,0.990009,53625,...,1927,0.023871,0,0.0,2199,4219,0.521214,,,"MULTIPOLYGON (((-118.29792 34.26322, -118.2969..."
1,6037101122,3064,15969060.0,0,1316,15969060.0,1316,1.0,1.0,89257,...,1907,0.005768,8,0.004195,577,3234,0.178417,,,"MULTIPOLYGON (((-118.27742 34.25990, -118.2774..."
2,6037101210,6043,5081114.0,0,504,4625910.0,504,1.0,0.910413,32039,...,2770,0.061733,114,0.041155,5247,5987,0.876399,,,"MULTIPOLYGON (((-118.28592 34.25589, -118.2859..."
3,6037101220,3340,5245595.0,0,688,5189783.0,688,1.0,0.98936,46393,...,1513,0.035691,54,0.035691,2110,3497,0.603374,1003.0,0.286817,"MULTIPOLYGON (((-118.27817 34.25577, -118.2782..."
4,6037101300,4285,14116160.0,0,1403,13701580.0,1403,1.0,0.970631,187989,...,2041,0.042136,10,0.0049,353,4250,0.083059,,,"MULTIPOLYGON (((-118.26526 34.25238, -118.2652..."


## Summary stats
Adjust these later.
Start with unweighted averages, but might think about how to do weighting by population, or whatever the denominator for that table is.

In [12]:
# We will count tract as being a TOC tract if over 50% of its area or 
# over 50% of its parcels are within a TOC Tier.
def set_groups(df):
    cutoff_AIN = 0.5
    cutoff_area = 0.5
    
    def set_cutoffs(row):
        toc_AIN = 0
        toc_area = 0
        
        if (row.in_tier == 1) & (row.pct_AIN >= cutoff_AIN):
            toc_AIN = 1
        if (row.in_tier == 1) & (row.pct_area >= cutoff_area):
            toc_area = 1
        
        return pd.Series([toc_AIN, toc_area], index=['toc_AIN', 'toc_area'])
    
    with_cutoffs = df.apply(set_cutoffs, axis=1)
    
    df = pd.concat([df, with_cutoffs], axis=1)
    
    return df

In [13]:
final = set_groups(m6)

In [14]:
print(f"# tracts where groups differ based on cut-offs: {len(final[final.toc_area != final.toc_AIN])}")
print(final.toc_area.value_counts())
print(final.toc_AIN.value_counts())

# tracts where groups differ based on cut-offs: 30
1    545
0    462
Name: toc_area, dtype: int64
1    549
0    458
Name: toc_AIN, dtype: int64


In [17]:
def summary_stats(df, category_col):
    summary = df.groupby(category_col).agg({
            "medincome_total": "mean",
            "medincome_white": "mean",
            "pct_non_white": "mean",
            "pct_non_car": "mean",
            "pct_zero_veh_workers": "mean",
            "pct_pop_renter": "mean",
    }).reset_index()
    
    return summary

In [18]:
by_AIN = summary_stats(final, "toc_AIN")
by_AIN

Unnamed: 0,toc_AIN,medincome_total,medincome_white,pct_non_white,pct_non_car,pct_zero_veh_workers,pct_pop_renter
0,0,82057.697105,83441.304251,0.370274,0.076774,0.028635,0.451208
1,1,50848.561243,55686.730841,0.518688,0.197169,0.096466,0.72989


In [19]:
by_area = summary_stats(final, "toc_area")
by_area

Unnamed: 0,toc_area,medincome_total,medincome_white,pct_non_white,pct_non_car,pct_zero_veh_workers,pct_pop_renter
0,0,82608.240088,84241.217295,0.366172,0.077704,0.028409,0.44799
1,1,50099.498155,54798.258004,0.526833,0.197267,0.097155,0.734655


In [20]:
writer = pd.ExcelWriter("../outputs/07-toc-census-stats.xlsx", engine="xlsxwriter")

by_AIN.to_excel(writer, sheet_name = "by_pct_AIN")
by_area.to_excel(writer, sheet_name = "by_pct_area")

writer.save()