# Link census tracts to TOC tiers
* Find what proportion belongs within a particular tier
* Make a crosswalk that tells you what % of that tract falls in what tier

In [1]:
import intake
import numpy as np
import pandas as pd
import geopandas as gpd
import boto3
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

To do: figure out where toc_parcels_tracts.zip is created (probably in scripts)
What is the universe of tracts?
Tracts that touch TOC Tiers? Probably, but they should definitely have a 0, 1, 2, 3, 4 observation, then we can fill in the corresponding % for each tier, and it has to sum to 1. No more, no less.
That should be our crosswalk, so when we merge on GEOID and TOC_Tier, we will not be losing obs like the way we are now.

In [3]:
toc_parcels = gpd.read_file(
    f"s3://{bucket_name}/gis/intermediate/toc_eligible_parcels_with_entitlements.geojson"
).to_crs({'init':'epsg:2229'})

crosswalk = pd.read_parquet(
            f's3://{bucket_name}/data/crosswalk_parcels_tracts.parquet')

  return _prepare_from_string(" ".join(pjargs))


In [4]:
crosswalk.head()

Unnamed: 0,AIN,parcelsqft,num_AIN,parcel_tot,TOC_Tier,GEOID,pop
0,2004001003,9685.584413,1,16990260.0,0,6037113232,4265
1,2004001004,9909.894745,1,16990260.0,0,6037113232,4265
2,2004001005,9641.455082,1,16990260.0,0,6037113232,4265
3,2004001008,11814.429891,1,16990260.0,0,6037113232,4265
4,2004001009,14878.423677,1,16990260.0,0,6037113232,4265


In [6]:
toc_parcels.head()

Unnamed: 0,AIN,TOC_Tier,zone_class,num_TOC,num_nonTOC,geometry
0,2023007025,2,C1,0,1,POINT (6378341.204 1896637.395)
1,2038040006,1,C2,0,1,POINT (6371948.529 1893038.654)
2,2038040011,2,C2,0,1,POINT (6373036.425 1893025.229)
3,2038040012,1,C2,0,2,POINT (6372758.850 1892764.015)
4,2038040013,1,C2,0,1,POINT (6372539.107 1892565.468)


In [8]:
# Merge in parcel-to-tracts crosswalk with those that have TOC entitlements
m1 = pd.merge(crosswalk, toc_parcels.drop(columns = 'geometry'), 
              on = ['AIN', 'TOC_Tier'], how = 'left', validate = '1:1')

In [9]:
m1.head()

Unnamed: 0,AIN,parcelsqft,num_AIN,parcel_tot,TOC_Tier,GEOID,pop,zone_class,num_TOC,num_nonTOC
0,2004001003,9685.584413,1,16990260.0,0,6037113232,4265,,,
1,2004001004,9909.894745,1,16990260.0,0,6037113232,4265,,,
2,2004001005,9641.455082,1,16990260.0,0,6037113232,4265,,,
3,2004001008,11814.429891,1,16990260.0,0,6037113232,4265,,,
4,2004001009,14878.423677,1,16990260.0,0,6037113232,4265,,,


In [None]:
drop_cols = ['x', 'y', 'num_AIN']

col_order = ['AIN', 'TOC_Tier', 'zone_class', 'num_TOC', 'num_nonTOC',
            'parcelsqft', 'geometry', 'GEOID', 'pop', 'parcel_tot', 'tract_sqft']

m3 = (m2
      .drop(columns = drop_cols)
      # Fill in info for parcels that don't have entitlements
      .assign(
          TOC_Tier = m2.TOC_Tier.fillna(0).astype(int),
          num_TOC = m2.num_TOC.fillna(0).astype(int),
          num_nonTOC = m2.num_nonTOC.fillna(0).astype(int),
      )
      .reindex(columns = col_order)
)

In [None]:
m4 = m3.groupby(['GEOID', 'tract_sqft', 'parcel_tot', 'TOC_Tier']).agg({'parcelsqft':'sum'}).reset_index()

m4 = (m4.assign(
        pct = m4.parcelsqft / m4.parcel_tot,
    ).sort_values(['GEOID', 'pct'], ascending = [True, False])[
    ['GEOID', 'TOC_Tier', 'parcelsqft', 'pct']]
)

In [None]:
m4.to_parquet('../data/crosswalk_tracts_tiers.parquet')
m4.to_parquet(f's3://{bucket_name}/data/crosswalk_tracts_tiers.parquet')

## crosswalk_tracts_tiers
We want to expand this so it has obs for tiers 0-4.

In [None]:
crosswalk = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_tracts_tiers.parquet')

In [None]:
just_GEOID = pd.DataFrame(np.repeat(crosswalk[['GEOID']].drop_duplicates().values, 5, axis=0))
just_GEOID.columns = ['GEOID']
just_GEOID['TOC_Tier'] = just_GEOID.groupby('GEOID').cumcount()

## toc_parcels_tracts