# Link census tracts to TOC tiers
* Find what proportion belongs within a particular tier

In [1]:
import intake
import numpy as np
import pandas as pd
import geopandas as gpd
import boto3
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

In [3]:
toc_parcels = gpd.read_file(
    f"s3://{bucket_name}/gis/intermediate/toc_eligible_parcels_with_entitlements.geojson"
).to_crs({'init':'epsg:2229'})

crosswalk = gpd.read_file(
            f'zip+s3://{bucket_name}/gis/intermediate/toc_parcels_tracts.zip').to_crs({'init':'epsg:2229'})

tracts = catalog.census_tracts.read().to_crs({'init':'epsg:2229'})

tracts = (
    tracts[['GEOID10', 'geometry']]
    .rename(columns = {'GEOID10': 'GEOID'})
    .assign(tract_sqft = tracts.geometry.area)
    .drop(columns = 'geometry')
)

In [4]:
# Merge in parcel-to-tracts crosswalk with those that have TOC entitlements
m1 = pd.merge(crosswalk, toc_parcels.drop(columns = 'geometry'), on = 'AIN', how = 'left', validate = '1:1')

# Merge in tract's geometry
m2 = pd.merge(m1, tracts, on = 'GEOID', how = 'inner', validate = 'm:1')

In [5]:
drop_cols = ['x', 'y', 'num_AIN']

col_order = ['AIN', 'TOC_Tier', 'zone_class', 'num_TOC', 'num_nonTOC',
            'parcelsqft', 'geometry', 'GEOID', 'pop', 'parcel_tot', 'tract_sqft']

m3 = (m2
      .drop(columns = drop_cols)
      # Fill in info for parcels that don't have entitlements
      .assign(
          TOC_Tier = m2.TOC_Tier.fillna(0).astype(int),
          num_TOC = m2.num_TOC.fillna(0).astype(int),
          num_nonTOC = m2.num_nonTOC.fillna(0).astype(int),
      )
      .reindex(columns = col_order)
)

In [6]:
m3.head(2)

Unnamed: 0,AIN,TOC_Tier,zone_class,num_TOC,num_nonTOC,parcelsqft,geometry,GEOID,pop,parcel_tot,tract_sqft
0,2010004040,0,,0,0,58199.046307,"POLYGON ((6378779.435 1908334.491, 6378886.387...",6037113232,4265,16990260.0,22234570.0
1,2024023012,0,,0,0,22112.234435,"POLYGON ((6373354.993 1893658.458, 6373354.443...",6037135102,3987,12757960.0,18075890.0


In [7]:
# It's possible for a tract to overlap several tiers
multiple = m3[['GEOID', 'TOC_Tier']].drop_duplicates()

geoid = ['06037265202', '06037207301', '06037181000']
multiple[multiple.GEOID.isin(geoid)]

Unnamed: 0,GEOID,TOC_Tier
7293,6037265202,0
7317,6037265202,3
7319,6037265202,4
13770,6037207301,3
13771,6037207301,0
13782,6037207301,4
22446,6037181000,0
22449,6037181000,2


In [8]:
m4 = m3.groupby(['GEOID', 'tract_sqft', 'parcel_tot', 'TOC_Tier']).agg({'parcelsqft':'sum'}).reset_index()
m4['pct'] = m4.parcelsqft / m4.parcel_tot

keep = ['GEOID', 'TOC_Tier', 'parcelsqft', 'pct']
m4 = m4[keep]

In [9]:
m4.to_parquet('../data/crosswalk_tract_tiers.parquet')