# Entitlements in TOC-eligible parcels

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import boto3

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

In [3]:
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)

In [3]:
parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/intermediate/la_parcels_toc.zip')

display(parcels.TOC_Tier.value_counts())
parcels = parcels[parcels.TOC_Tier > 0]

0    442562
1    148994
3    110153
2     65653
4      7844
Name: TOC_Tier, dtype: int64

In [5]:
cases = catalog.pcts.tCASE.read()
app = catalog.pcts.tAPLC.read()
geo_info = catalog.pcts.tPROP_GEO_INFO.read()
la_prop = catalog.pcts.tLA_PROP.read()

In [11]:
cases1 = cases[['CASE_ID', 'APLC_ID', 'CASE_NBR', 'CASE_SEQ_NBR', 'CASE_YR_NBR', 'CASE_ACTION_ID', 'ADM_ACTION_DT']]
app1 = app[['APLC_ID', 'PROJ_DESC_TXT']]
geo_info1 = geo_info[['CASE_ID', 'PROP_ID']]
la_prop1 = la_prop[la_prop.ASSR_PRCL_NBR.notna()][['PROP_ID', 'ASSR_PRCL_NBR']]

In [12]:
print(f'# obs in cases1: {len(cases1)}')
print(f'# unique CASE_ID in cases1: {cases1.CASE_ID.nunique()}')
display(cases1.head())

# obs in cases1: 211262
# unique CASE_ID in cases1: 211262


Unnamed: 0,CASE_ID,APLC_ID,CASE_NBR,CASE_SEQ_NBR,CASE_YR_NBR,CASE_ACTION_ID,ADM_ACTION_DT
0,66.0,66.0,PC-1987-764-PC,764.0,1987.0,,NaT
1,67.0,67.0,PC-1987-761-PC,761.0,1987.0,,NaT
2,68.0,68.0,CPC-1987-640-ZC,640.0,1987.0,,NaT
3,70.0,70.0,PC-1987-758-PC,758.0,1987.0,,NaT
4,72.0,72.0,CPC-1987-630-BL,630.0,1987.0,,NaT


In [13]:
print(f'# obs in geo_info1: {len(geo_info1)}')
print(f'# unique CASE_ID in geo_info1: {geo_info1.CASE_ID.nunique()}')
print(f'# unique PROP_ID in geo_info1: {geo_info1.PROP_ID.nunique()}')
display(geo_info1.head())

# There can be multiple CASE_IDs that take place on same PROP_ID. Makes sense, over time, the same parcel might have multiple entitlements
# Also makes sense for there to be multiple PROP_IDs that appear here

# obs in geo_info1: 878947
# unique CASE_ID in geo_info1: 143251
# unique PROP_ID in geo_info1: 314716


Unnamed: 0,CASE_ID,PROP_ID
0,99538.0,34237.0
1,99539.0,34237.0
2,99666.0,34306.0
3,99667.0,34306.0
4,99722.0,34323.0


In [14]:
print(f'# obs in la_prop1: {len(la_prop1)}')
print(f'# unique PROP_ID in la_prop1: {la_prop1.PROP_ID.nunique()}')
print(f'# unique ASSR_PRCL_NBR in la_prop1: {la_prop1.ASSR_PRCL_NBR.nunique()}')
display(la_prop1.head())

# If PROP_ID is unique...that means every time a parcel has an entitlemnet, a unique PROP_ID is generated
# That would explain why the same ASSR_PRCL_NBR has multiple PROP_IDs

# obs in la_prop1: 509706
# unique PROP_ID in la_prop1: 509706
# unique ASSR_PRCL_NBR in la_prop1: 364273


Unnamed: 0,PROP_ID,ASSR_PRCL_NBR
0,16.0,6058008014
8,24.0,4129037906
27,43.0,5548010021
37,53.0,7456029026
43,59.0,7351031031


## Only keep cases from 2016 and after

In [15]:
cases2 = cases1[cases1.CASE_YR_NBR >= 2016]

In [16]:
print(f'# obs in cases1: {len(cases1)}')
print(f'# obs in cases2: {len(cases2)}')
print(f'# unique CASE_ID in cases2: {cases2.CASE_ID.nunique()}')
print(f'# unique CASE_SEQ_NBR in cases2: {cases2.CASE_SEQ_NBR.nunique()}')
display(cases2.head())

# obs in cases1: 211262
# obs in cases2: 26374
# unique CASE_ID in cases2: 26374
# unique CASE_SEQ_NBR in cases2: 8297


Unnamed: 0,CASE_ID,APLC_ID,CASE_NBR,CASE_SEQ_NBR,CASE_YR_NBR,CASE_ACTION_ID,ADM_ACTION_DT
2086,216134.0,178573.0,ZA-2017-3841-CUB,3841.0,2017.0,2.0,NaT
2323,210277.0,174669.0,DIR-2016-3730-VSO,3730.0,2016.0,1.0,NaT
2586,213779.0,177008.0,DIR-2017-1719-SPP,1719.0,2017.0,2.0,NaT
2587,213782.0,177009.0,ZA-2017-1722-ZV,1722.0,2017.0,2.0,NaT
3630,222419.0,183093.0,ADM-2018-3967-OVR-MSP,3967.0,2018.0,1.0,2018-07-06


In [20]:
cases2[cases2.CASE_SEQ_NBR==1169].sort_values(['CASE_SEQ_NBR', 'CASE_YR_NBR'], ascending = [True, True])

Unnamed: 0,CASE_ID,APLC_ID,CASE_NBR,CASE_SEQ_NBR,CASE_YR_NBR,CASE_ACTION_ID,ADM_ACTION_DT
184316,207294.0,172695.0,ENV-2016-1169-MND,1169.0,2016.0,,NaT
204052,229173.0,188252.0,ENV-2016-1169-MND-REC1,1169.0,2016.0,,NaT
190643,213127.0,176566.0,ZA-2017-1169-CUB-CUX,1169.0,2017.0,2.0,NaT
195192,218650.0,180303.0,ZA-2017-1169-CUB-CUX-1A,1169.0,2017.0,,NaT
207862,230043.0,188934.0,ZA-2017-1169-CUB-CUX-1A-PA1,1169.0,2017.0,,NaT
198205,219336.0,180805.0,ZA-2018-1169-CU,1169.0,2018.0,,NaT
205074,227662.0,187079.0,ADM-2019-1169-TOC,1169.0,2019.0,13.0,2019-03-20


## Merge with geo_info, la_prop, parcels to ID the parcels that are TOC-eligible and have entitlements

In [21]:
m1 = pd.merge(cases2, geo_info1, on = 'CASE_ID', how = 'inner', validate = '1:m')

In [22]:
m2 = pd.merge(m1, la_prop1, on = 'PROP_ID', how = 'inner', validate = 'm:1')

In [24]:
print(f'# obs in m2: {len(m2)}')
print(f'# unique CASE_ID in m2: {m2.CASE_ID.nunique()}')
print(f'# unique CASE_SEQ_NBR in m2: {m2.CASE_SEQ_NBR.nunique()}')
print(f'# unique PROP_ID in m2: {m2.PROP_ID.nunique()}')
print(f'# unique ASSR_PRCL_NBR in m2: {m2.ASSR_PRCL_NBR.nunique()}')
display(m2.head())

# obs in m2: 312581
# unique CASE_ID in m2: 22981
# unique CASE_SEQ_NBR in m2: 8172
# unique PROP_ID in m2: 162365
# unique ASSR_PRCL_NBR in m2: 118743


Unnamed: 0,CASE_ID,APLC_ID,CASE_NBR,CASE_SEQ_NBR,CASE_YR_NBR,CASE_ACTION_ID,ADM_ACTION_DT,PROP_ID,ASSR_PRCL_NBR
0,216134.0,178573.0,ZA-2017-3841-CUB,3841.0,2017.0,2.0,NaT,59123158.0,5032002058
1,210221.0,171491.0,CPC-2016-3681-DA,3681.0,2016.0,2.0,NaT,59123158.0,5032002058
2,216135.0,178573.0,ENV-2017-3842-CE,3842.0,2017.0,,NaT,59123158.0,5032002058
3,216364.0,117096.0,ENV-2017-4038-CE,4038.0,2017.0,,NaT,59123158.0,5032002058
4,210277.0,174669.0,DIR-2016-3730-VSO,3730.0,2016.0,1.0,NaT,59037920.0,4240011032


In [26]:
m3 = pd.merge(m2, parcels, left_on = 'ASSR_PRCL_NBR', right_on = 'AIN', how = 'inner', validate = 'm:1')

In [27]:
print(f'# obs in m3: {len(m3)}')
print(f'# unique CASE_ID in m3: {m3.CASE_ID.nunique()}')
print(f'# unique CASE_SEQ_NBR in m3: {m3.CASE_SEQ_NBR.nunique()}')
print(f'# unique PROP_ID in m3: {m3.PROP_ID.nunique()}')
print(f'# unique ASSR_PRCL_NBR in m3: {m3.ASSR_PRCL_NBR.nunique()}')
display(m3.head())

# obs in m3: 80186
# unique CASE_ID in m3: 12064
# unique CASE_SEQ_NBR in m3: 6898
# unique PROP_ID in m3: 40430
# unique ASSR_PRCL_NBR in m3: 23047


Unnamed: 0,CASE_ID,APLC_ID,CASE_NBR,CASE_SEQ_NBR,CASE_YR_NBR,CASE_ACTION_ID,ADM_ACTION_DT,PROP_ID,ASSR_PRCL_NBR,AIN,TOC_Tier,geometry
0,216134.0,178573.0,ZA-2017-3841-CUB,3841.0,2017.0,2.0,NaT,59123158.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."
1,210221.0,171491.0,CPC-2016-3681-DA,3681.0,2016.0,2.0,NaT,59123158.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."
2,216135.0,178573.0,ENV-2017-3842-CE,3842.0,2017.0,,NaT,59123158.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."
3,216364.0,117096.0,ENV-2017-4038-CE,4038.0,2017.0,,NaT,59123158.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."
4,210221.0,171491.0,CPC-2016-3681-DA,3681.0,2016.0,2.0,NaT,59088025.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."


## Join parcels to zoning
* Subset by eligible zones, see how many TOC-eligible parcels also fall into eligible zones

In [29]:
zoning = gpd.read_file(f's3://{bucket_name}/gis/raw/parsed_zoning.geojson')

In [40]:
eligible_zones = ['R2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5', 
                  'RD1.5', 'RD2', 'RD3', 'RD4', 'RD5', 'RD6', 
                  'C1', 'C2', 'C4', 'C5']

eligible_zoning = zoning[zoning.zone_class.isin(eligible_zones)]

In [75]:
parcels['centroid'] = parcels.geometry.centroid

In [87]:
parcels2 = parcels.set_geometry('centroid')
parcels2 = parcels2[['AIN', 'TOC_Tier', 'centroid']]
parcels2['x'] = parcels2.centroid.x
parcels2['y'] = parcels2.centroid.y

In [89]:
parcels2['obs'] = parcels2.groupby(['x', 'y']).cumcount() + 1
parcels2['num_obs'] = parcels2.groupby(['x', 'y'])['obs'].transform('max')

In [99]:
parcels2.to_file(driver = 'GeoJSON', filename = '../gis/parcels_with_toc.geojson')

In [103]:
parcels_with_zoning = gpd.sjoin(parcels2, eligible_zoning, how = 'inner', op = 'intersects').drop(columns = ['index_right'])

In [104]:
print(f'# obs in parcels2: {len(parcels2)}')
print(f'# obs in parcels_with_zoning: {len(parcels_with_zoning)}')
print(f'# unique AIN in parcels2: {parcels2.AIN.nunique()}')
print(f'# unique AIN in parcels_with_zoning: {parcels_with_zoning.AIN.nunique()}')
display(parcels_with_zoning.head())

# obs in parcels2: 332644
# obs in parcels_with_zoning: 195008
# unique AIN in parcels2: 332644
# unique AIN in parcels_with_zoning: 187017


Unnamed: 0,AIN,TOC_Tier,centroid,x,y,obs,num_obs,ZONE_CMPLT,ZONE_CLASS1,ZONE_SMRY,...,zone_class,specific_plan,height_district,D,o1,o2,o3,o1_descrip,o2_descrip,o3_descrip
1379,2010004040,1,POINT (6378795.418 1908220.195),6378795.0,1908220.0,1,1,R3-1,R3,RESIDENTIAL,...,R3,,1,0,,,,,,
1380,2010004045,1,POINT (6378721.467 1908496.944),6378721.0,1908497.0,1,1,C4-1,C4,COMMERCIAL,...,C4,,1,0,,,,,,
1381,2010004047,1,POINT (6378828.013 1908358.350),6378828.0,1908358.0,1,1,C4-1,C4,COMMERCIAL,...,C4,,1,0,,,,,,
6177,2021012016,1,POINT (6373060.999 1898646.494),6373061.0,1898646.0,1,1,C2-1VL,C2,COMMERCIAL,...,C2,,1VL,0,,,,,,
6178,2021012017,1,POINT (6373015.575 1898424.105),6373016.0,1898424.0,1,1,C2-1VL,C2,COMMERCIAL,...,C2,,1VL,0,,,,,,


In [110]:
# Merge in zoning and TOC info about the parcel
m4 = pd.merge(m3, parcels_with_zoning, on = 'AIN', how = 'inner')

In [113]:
# Drop duplicates
display(m4.num_obs.value_counts())
m5 = m4[m4.obs == 1]

1     50200
2        91
44        6
Name: num_obs, dtype: int64

In [116]:
# Merge in project description
m6 = pd.merge(m5, app1, on = 'APLC_ID', how = 'inner', validate = 'm:1')

In [119]:
# For same CASE_SEQ_NBR, keep the max CASE_ID
m6['max_CASE_ID'] = m6.groupby(['CASE_SEQ_NBR', 'CASE_YR_NBR'])['CASE_ID'].transform('max')
m6 = m6[m6.CASE_ID == m6.max_CASE_ID]

In [122]:
drop = ['centroid', 'x', 'y', 'obs', 'num_obs', 
        'CASE_ID', 'max_CASE_ID', 'APLC_ID', 'ASSR_PRCL_NBR', 'PROP_ID', 
       'TOC_Tier_y']

m6 = m6.drop(columns = drop)
m6.rename(columns = {'TOC_Tier_x':'TOC_Tier'}, inplace = True)

In [129]:
m6.drop(columns = 'geometry').to_parquet('../gis/m6.parquet')

In [125]:
print(f'# obs in m6: {len(m6)}')
print(f'# unique CASE_SEQ_NBR in m6: {m6.CASE_SEQ_NBR.nunique()}')
display(m6.sort_values(['CASE_SEQ_NBR', 'CASE_YR_NBR']).head(10))

Unnamed: 0,CASE_NBR,CASE_SEQ_NBR,CASE_YR_NBR,CASE_ACTION_ID,ADM_ACTION_DT,AIN,TOC_Tier,geometry,ZONE_CMPLT,ZONE_CLASS1,...,specific_plan,height_district,D,o1,o2,o3,o1_descrip,o2_descrip,o3_descrip,PROJ_DESC_TXT
13193,AA-2016-1-PMLA-SL,1.0,2016.0,2.0,NaT,2419003028,1,"POLYGON ((6450159.881 1881744.943, 6450159.781...",[Q]R3-1,R3,...,,1,0,,,,,,,PRELIMINARY PARCEL MAP FOR A 3-LOT SMALL LOT S...
45222,ADM-2019-1-TOC,1.0,2019.0,13.0,2019-01-17,2335009020,1,"POLYGON ((6444717.071 1891295.974, 6444716.841...",R3-1,R3,...,,1,0,,,,,,,TIER VERIFICATION
45223,ADM-2019-1-TOC,1.0,2019.0,13.0,2019-01-17,2335009010,1,"POLYGON ((6444567.461 1891396.414, 6444567.261...",C2-1VL,C2,...,,1VL,0,,,,,,,TIER VERIFICATION
45224,ADM-2019-1-TOC,1.0,2019.0,13.0,2019-01-17,2335009010,1,"POLYGON ((6444567.461 1891396.414, 6444567.261...",C2-1VL,C2,...,,1VL,0,,,,,,,TIER VERIFICATION
45225,ADM-2019-1-TOC,1.0,2019.0,13.0,2019-01-17,2335009010,1,"POLYGON ((6444567.461 1891396.414, 6444567.261...",C2-1VL,C2,...,,1VL,0,,,,,,,TIER VERIFICATION
45226,ADM-2019-1-TOC,1.0,2019.0,13.0,2019-01-17,2335009010,1,"POLYGON ((6444567.461 1891396.414, 6444567.261...",C2-1VL,C2,...,,1VL,0,,,,,,,TIER VERIFICATION
13194,ENV-2016-2-CE,2.0,2016.0,,NaT,2419003028,1,"POLYGON ((6450159.881 1881744.943, 6450159.781...",[Q]R3-1,R3,...,,1,0,,,,,,,PRELIMINARY PARCEL MAP FOR A 3-LOT SMALL LOT S...
43362,PAR-2019-2-CM,2.0,2019.0,14.0,2019-01-30,5414017024,3,"POLYGON ((6490135.624 1846988.237, 6490169.904...",C2-2D,C2,...,,2,1,,,,,,,PRE-DEVELOPMENT MEETING REQUEST
43363,PAR-2019-2-CM,2.0,2019.0,14.0,2019-01-30,5414017024,3,"POLYGON ((6490135.624 1846988.237, 6490169.904...",C2-2D,C2,...,,2,1,,,,,,,PRE-DEVELOPMENT MEETING REQUEST
43364,PAR-2019-2-CM,2.0,2019.0,14.0,2019-01-30,5414017024,3,"POLYGON ((6490135.624 1846988.237, 6490169.904...",C2-2D,C2,...,,2,1,,,,,,,PRE-DEVELOPMENT MEETING REQUEST


In [130]:
import pcts_parser

parsed_col_names = ['suffix']

def parse_pcts(row):
    try:
        z = pcts_parser.PCTSCaseNumber(row.CASE_NBR)
        return pd.Series([z.suffix], index = parsed_col_names)
    except ValueError:
        return pd.Series([z.suffix], index = parsed_col_names)

parsed = m6.apply(parse_pcts, axis = 1)

m7 = pd.concat([m6, parsed], axis = 1)

In [134]:
m7.drop(columns = 'geometry').to_parquet('../gis/m7.parquet')

In [144]:
m7 = pd.read_parquet('../gis/m7.parquet')

In [149]:
m7['suffix'][1:5]

1    [CE]
2    [DA]
3    [DA]
4    [DA]
Name: suffix, dtype: object