# Entitlements in TOC-eligible parcels

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import boto3

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

In [None]:
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)

In [None]:
parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/intermediate/la_parcels_toc.zip')

display(parcels.TOC_Tier.value_counts())
parcels = parcels[parcels.TOC_Tier > 0]

In [None]:
cases = catalog.pcts.tCASE.read()
app = catalog.pcts.tAPLC.read()
geo_info = catalog.pcts.tPROP_GEO_INFO.read()
la_prop = catalog.pcts.tLA_PROP.read()

In [None]:
cases1 = cases[['CASE_ID', 'APLC_ID', 'CASE_NBR', 'CASE_SEQ_NBR', 'CASE_YR_NBR', 'CASE_ACTION_ID', 'ADM_ACTION_DT']]
app1 = app[['APLC_ID', 'PROJ_DESC_TXT']]
geo_info1 = geo_info[['CASE_ID', 'PROP_ID']]
la_prop1 = la_prop[la_prop.ASSR_PRCL_NBR.notna()][['PROP_ID', 'ASSR_PRCL_NBR']]

In [None]:
print(f'# obs in cases1: {len(cases1)}')
print(f'# unique CASE_ID in cases1: {cases1.CASE_ID.nunique()}')
display(cases1.head())

In [None]:
print(f'# obs in geo_info1: {len(geo_info1)}')
print(f'# unique CASE_ID in geo_info1: {geo_info1.CASE_ID.nunique()}')
print(f'# unique PROP_ID in geo_info1: {geo_info1.PROP_ID.nunique()}')
display(geo_info1.head())

# There can be multiple CASE_IDs that take place on same PROP_ID. Makes sense, over time, the same parcel might have multiple entitlements
# Also makes sense for there to be multiple PROP_IDs that appear here

In [None]:
print(f'# obs in la_prop1: {len(la_prop1)}')
print(f'# unique PROP_ID in la_prop1: {la_prop1.PROP_ID.nunique()}')
print(f'# unique ASSR_PRCL_NBR in la_prop1: {la_prop1.ASSR_PRCL_NBR.nunique()}')
display(la_prop1.head())

# If PROP_ID is unique...that means every time a parcel has an entitlemnet, a unique PROP_ID is generated
# That would explain why the same ASSR_PRCL_NBR has multiple PROP_IDs

## Only keep cases from 2016 and after

In [None]:
cases2 = cases1[cases1.CASE_YR_NBR >= 2016]

In [None]:
print(f'# obs in cases1: {len(cases1)}')
print(f'# obs in cases2: {len(cases2)}')
print(f'# unique CASE_ID in cases2: {cases2.CASE_ID.nunique()}')
print(f'# unique CASE_SEQ_NBR in cases2: {cases2.CASE_SEQ_NBR.nunique()}')
display(cases2.head())

In [None]:
cases2[cases2.CASE_SEQ_NBR==1169].sort_values(['CASE_SEQ_NBR', 'CASE_YR_NBR'], ascending = [True, True])

## Merge with geo_info, la_prop, parcels to ID the parcels that are TOC-eligible and have entitlements

In [None]:
m1 = pd.merge(cases2, geo_info1, on = 'CASE_ID', how = 'inner', validate = '1:m')

In [None]:
m2 = pd.merge(m1, la_prop1, on = 'PROP_ID', how = 'inner', validate = 'm:1')

In [None]:
print(f'# obs in m2: {len(m2)}')
print(f'# unique CASE_ID in m2: {m2.CASE_ID.nunique()}')
print(f'# unique CASE_SEQ_NBR in m2: {m2.CASE_SEQ_NBR.nunique()}')
print(f'# unique PROP_ID in m2: {m2.PROP_ID.nunique()}')
print(f'# unique ASSR_PRCL_NBR in m2: {m2.ASSR_PRCL_NBR.nunique()}')
display(m2.head())

In [None]:
m3 = pd.merge(m2, parcels, left_on = 'ASSR_PRCL_NBR', right_on = 'AIN', how = 'inner', validate = 'm:1')

In [None]:
print(f'# obs in m3: {len(m3)}')
print(f'# unique CASE_ID in m3: {m3.CASE_ID.nunique()}')
print(f'# unique CASE_SEQ_NBR in m3: {m3.CASE_SEQ_NBR.nunique()}')
print(f'# unique PROP_ID in m3: {m3.PROP_ID.nunique()}')
print(f'# unique ASSR_PRCL_NBR in m3: {m3.ASSR_PRCL_NBR.nunique()}')
display(m3.head())

## Join parcels to zoning
* Subset by eligible zones, see how many TOC-eligible parcels also fall into eligible zones

In [None]:
zoning = gpd.read_file(f's3://{bucket_name}/gis/raw/parsed_zoning.geojson')

In [None]:
eligible_zones = ['R2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5', 
                  'RD1.5', 'RD2', 'RD3', 'RD4', 'RD5', 'RD6', 
                  'C1', 'C2', 'C4', 'C5']

eligible_zoning = zoning[zoning.zone_class.isin(eligible_zones)]

In [None]:
parcels['centroid'] = parcels.geometry.centroid

In [None]:
parcels2 = parcels.set_geometry('centroid')
parcels2 = parcels2[['AIN', 'TOC_Tier', 'centroid']]
parcels2['x'] = parcels2.centroid.x
parcels2['y'] = parcels2.centroid.y

In [None]:
parcels2['obs'] = parcels2.groupby(['x', 'y']).cumcount() + 1
parcels2['num_obs'] = parcels2.groupby(['x', 'y'])['obs'].transform('max')

In [None]:
parcels2.to_file(driver = 'GeoJSON', filename = '../gis/parcels_with_toc.geojson')

In [None]:
parcels_with_zoning = gpd.sjoin(parcels2, eligible_zoning, how = 'inner', op = 'intersects').drop(columns = ['index_right'])

In [None]:
print(f'# obs in parcels2: {len(parcels2)}')
print(f'# obs in parcels_with_zoning: {len(parcels_with_zoning)}')
print(f'# unique AIN in parcels2: {parcels2.AIN.nunique()}')
print(f'# unique AIN in parcels_with_zoning: {parcels_with_zoning.AIN.nunique()}')
display(parcels_with_zoning.head())

In [None]:
# Merge in zoning and TOC info about the parcel
m4 = pd.merge(m3, parcels_with_zoning, on = 'AIN', how = 'inner')

In [None]:
# Drop duplicates
display(m4.num_obs.value_counts())
m5 = m4[m4.obs == 1]

In [None]:
# Merge in project description
m6 = pd.merge(m5, app1, on = 'APLC_ID', how = 'inner', validate = 'm:1')

In [None]:
# For same CASE_SEQ_NBR, keep the max CASE_ID
m6['max_CASE_ID'] = m6.groupby(['CASE_SEQ_NBR', 'CASE_YR_NBR'])['CASE_ID'].transform('max')
m6 = m6[m6.CASE_ID == m6.max_CASE_ID]

In [None]:
drop = ['centroid', 'x', 'y', 'obs', 'num_obs', 
        'CASE_ID', 'max_CASE_ID', 'APLC_ID', 'ASSR_PRCL_NBR', 'PROP_ID', 
       'TOC_Tier_y']

m6 = m6.drop(columns = drop)
m6.rename(columns = {'TOC_Tier_x':'TOC_Tier'}, inplace = True)

In [None]:
m6.drop(columns = 'geometry').to_parquet('../gis/m6.parquet')

In [None]:
print(f'# obs in m6: {len(m6)}')
print(f'# unique CASE_SEQ_NBR in m6: {m6.CASE_SEQ_NBR.nunique()}')
display(m6.sort_values(['CASE_SEQ_NBR', 'CASE_YR_NBR']).head(10))

In [None]:
import pcts_parser

parsed_col_names = ['suffix']

def parse_pcts(row):
    try:
        z = pcts_parser.PCTSCaseNumber(row.CASE_NBR)
        return pd.Series([z.suffix], index = parsed_col_names)
    except ValueError:
        return pd.Series([z.suffix], index = parsed_col_names)

parsed = m6.apply(parse_pcts, axis = 1)

m7 = pd.concat([m6, parsed], axis = 1)

In [None]:
m7.drop(columns = 'geometry').to_parquet('../gis/m7.parquet')

In [3]:
m7 = pd.read_parquet('../gis/m7.parquet')

In [4]:
m7['suffix'][1:5]

1    [CE]
2    [DA]
3    [DA]
4    [DA]
Name: suffix, dtype: object

In [12]:
split = m7.suffix.apply(pd.Series)
split.rename(columns = {0: 'one', 1: 'two', 2: 'three',
                        3: 'four', 4: 'five', 5: 'six',  6: 'seven'}, inplace = True)  

In [15]:
for col in ['one', 'two', 'three', 'four', 'five', 'six', 'seven']:
    split[col] = split[col].fillna('')

In [17]:
split[split.seven != '']

Unnamed: 0,one,two,three,four,five,six,seven
79,SN,TDR,CUB,ZV,WDI,SPR,MSC
82,SN,TDR,CUB,ZV,WDI,SPR,MSC
85,SN,TDR,CUB,ZV,WDI,SPR,MSC
88,SN,TDR,CUB,ZV,WDI,SPR,MSC
91,SN,TDR,CUB,ZV,WDI,SPR,MSC
...,...,...,...,...,...,...,...
33444,CU,DB,SPE,SPP,SPR,MCUP,DD
41617,GPAJ,VZC,HD,SP,DRB,SPP,SPR
48990,CU,DB,CDP,CDO,SPP,MEL,WDI
48992,CU,DB,CDP,CDO,SPP,MEL,WDI


In [18]:
m7 = pd.concat([m7, split], axis = 1)

In [20]:
just_suffix = m7[['CASE_SEQ_NBR', 'CASE_YR_NBR', 'one', 'two', 'three', 'four', 'five', 'six', 'seven']]

In [40]:
toc_cases = m7[m7.CASE_NBR.str.contains('TOC')]
non_toc_cases = m7[~m7.CASE_NBR.str.contains('TOC')]

In [37]:
len(toc_cases)

6439

In [41]:
len(non_toc_cases)

43825

In [42]:
toc_cases.zone_class.value_counts()

C2       3011
R3       1550
R4        960
C4        521
RD1.5     149
R5         79
RD2        66
RAS4       36
RAS3       31
C1         18
R2         18
Name: zone_class, dtype: int64

In [43]:
non_toc_cases.zone_class.value_counts()

C2       20307
C4        6166
R3        4092
RD1.5     3903
R2        3843
RD2       1647
R4        1197
C5         797
R5         736
C1         522
RD3        285
RAS4       163
RD5        112
RAS3        50
RD4          5
Name: zone_class, dtype: int64