# Entitlements in TOC-eligible parcels

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import boto3

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/intermediate/la_parcels_toc.zip')

display(parcels.TOC_Tier.value_counts())
parcels = parcels[parcels.TOC_Tier > 0]

0    442562
1    148994
3    110153
2     65653
4      7844
Name: TOC_Tier, dtype: int64

In [5]:
parcels.head()

Unnamed: 0,AIN,TOC_Tier,geometry
1356,2010004010,1,"POLYGON ((6378513.268 1908559.821, 6378530.745..."
1357,2010004011,1,"POLYGON ((6378509.405 1908588.880, 6378520.245..."
1368,2010004024,1,"POLYGON ((6378629.302 1908076.203, 6378628.535..."
1369,2010004025,1,"POLYGON ((6378660.195 1908117.450, 6378711.685..."
1370,2010004026,1,"POLYGON ((6378852.685 1908116.570, 6378759.926..."


In [6]:
parcels.dtypes

AIN           object
TOC_Tier       int64
geometry    geometry
dtype: object

In [7]:
cases = catalog.pcts.tCASE.read()
geo_info = catalog.pcts.tPROP_GEO_INFO.read()
la_prop = catalog.pcts.tLA_PROP.read()

In [8]:
cases1 = cases[['CASE_ID', 'CASE_NBR']]
geo_info1 = geo_info[['CASE_ID', 'PROP_ID']]
la_prop1 = la_prop[la_prop.ASSR_PRCL_NBR.notna()][['PROP_ID', 'ASSR_PRCL_NBR']]

In [9]:
print(f'# obs in cases1: {len(cases1)}')
print(f'# unique CASE_ID in cases1: {cases1.CASE_ID.nunique()}')
display(cases1.head())

# obs in cases1: 211262
# unique CASE_ID in cases1: 211262


Unnamed: 0,CASE_ID,CASE_NBR
0,66.0,PC-1987-764-PC
1,67.0,PC-1987-761-PC
2,68.0,CPC-1987-640-ZC
3,70.0,PC-1987-758-PC
4,72.0,CPC-1987-630-BL


In [10]:
print(f'# obs in geo_info1: {len(geo_info1)}')
print(f'# unique CASE_ID in geo_info1: {geo_info1.CASE_ID.nunique()}')
print(f'# unique PROP_ID in geo_info1: {geo_info1.PROP_ID.nunique()}')
display(geo_info1.head())

# There can be multiple CASE_IDs that take place on same PROP_ID. Makes sense, over time, the same parcel might have multiple entitlements
# Also makes sense for there to be multiple PROP_IDs that appear here

# obs in geo_info1: 878947
# unique CASE_ID in geo_info1: 143251
# unique PROP_ID in geo_info1: 314716


Unnamed: 0,CASE_ID,PROP_ID
0,99538.0,34237.0
1,99539.0,34237.0
2,99666.0,34306.0
3,99667.0,34306.0
4,99722.0,34323.0


In [11]:
print(f'# obs in la_prop1: {len(la_prop1)}')
print(f'# unique PROP_ID in la_prop1: {la_prop1.PROP_ID.nunique()}')
print(f'# unique ASSR_PRCL_NBR in la_prop1: {la_prop1.ASSR_PRCL_NBR.nunique()}')
display(la_prop1.head())

# If PROP_ID is unique...that means every time a parcel has an entitlemnet, a unique PROP_ID is generated
# That would explain why the same ASSR_PRCL_NBR has multiple PROP_IDs

# obs in la_prop1: 509706
# unique PROP_ID in la_prop1: 509706
# unique ASSR_PRCL_NBR in la_prop1: 364273


Unnamed: 0,PROP_ID,ASSR_PRCL_NBR
0,16.0,6058008014
8,24.0,4129037906
27,43.0,5548010021
37,53.0,7456029026
43,59.0,7351031031


## Parse PCTS first, and only keep cases from 2016 and after

In [12]:
import pcts_parser

parsed_col_names = ['prefix', 'year', 'pcts_case_id', 'suffix', 'invalid_prefix']

def parse_pcts(row):
    try:
        z = pcts_parser.PCTSCaseNumber(row.CASE_NBR)
        return pd.Series([z.prefix, z.year, z.pcts_case_id, z.suffix, z.invalid_prefix], 
                         index = parsed_col_names)
    except ValueError:
        return pd.Series(['failed', 'failed', 'failed', 'failed', 'failed'], 
                         index = parsed_col_names)

    
parsed = cases1.apply(parse_pcts, axis = 1)

cases1 = pd.concat([cases1, parsed], axis = 1)

cases1.head()

Unnamed: 0,CASE_ID,CASE_NBR,prefix,year,pcts_case_id,suffix,invalid_prefix
0,66.0,PC-1987-764-PC,invalid,1987,764,[PC],PC
1,67.0,PC-1987-761-PC,invalid,1987,761,[PC],PC
2,68.0,CPC-1987-640-ZC,CPC,1987,640,[ZC],
3,70.0,PC-1987-758-PC,invalid,1987,758,[PC],PC
4,72.0,CPC-1987-630-BL,CPC,1987,630,[BL],


In [13]:
cases2 = cases1[cases1.year.str.contains('20')]
cases2.year = cases2.year.astype(int)

cases2 = cases2[cases2.year >= 2016]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [14]:
print(f'# obs in cases1: {len(cases1)}')
print(f'# obs in cases2: {len(cases2)}')
print(f'# unique CASE_ID in cases2: {cases2.CASE_ID.nunique()}')
print(f'# unique pcts_case_id in cases2: {cases2.pcts_case_id.nunique()}')
display(cases2.head())

# obs in cases1: 211262
# obs in cases2: 25672
# unique CASE_ID in cases2: 25672
# unique pcts_case_id in cases2: 7768


Unnamed: 0,CASE_ID,CASE_NBR,prefix,year,pcts_case_id,suffix,invalid_prefix
2086,216134.0,ZA-2017-3841-CUB,ZA,2017,3841,[CUB],
2323,210277.0,DIR-2016-3730-VSO,DIR,2016,3730,[VSO],
2586,213779.0,DIR-2017-1719-SPP,DIR,2017,1719,[SPP],
2587,213782.0,ZA-2017-1722-ZV,ZA,2017,1722,[ZV],
3630,222419.0,ADM-2018-3967-OVR-MSP,ADM,2018,3967,"[OVR, MSP]",


In [15]:
cases2[cases2.pcts_case_id=='1169'].sort_values(['pcts_case_id', 'year'], ascending = [True, True])

Unnamed: 0,CASE_ID,CASE_NBR,prefix,year,pcts_case_id,suffix,invalid_prefix
184316,207294.0,ENV-2016-1169-MND,ENV,2016,1169,[MND],
204052,229173.0,ENV-2016-1169-MND-REC1,ENV,2016,1169,"[MND, REC1]",
190643,213127.0,ZA-2017-1169-CUB-CUX,ZA,2017,1169,"[CUB, CUX]",
195192,218650.0,ZA-2017-1169-CUB-CUX-1A,ZA,2017,1169,"[CUB, CUX, 1A]",
207862,230043.0,ZA-2017-1169-CUB-CUX-1A-PA1,ZA,2017,1169,"[CUB, CUX, 1A, PA1]",
198205,219336.0,ZA-2018-1169-CU,ZA,2018,1169,[CU],
205074,227662.0,ADM-2019-1169-TOC,ADM,2019,1169,[TOC],


In [16]:
cases2.sort_values(['pcts_case_id', 'CASE_ID'], ascending = [True, True]).head()

Unnamed: 0,CASE_ID,CASE_NBR,prefix,year,pcts_case_id,suffix,invalid_prefix
184146,205918.0,AA-2016-1-PMLA-SL,AA,2016,1,"[PMLA, SL]",
188332,211774.0,DIR-2017-1-CWC,DIR,2017,1,[CWC],
204655,226430.0,ADM-2019-1-TOC,ADM,2019,1,[TOC],
210342,234694.0,DIR-2020-1-CLQ,DIR,2020,1,[CLQ],
183292,205928.0,DIR-2016-10-CEX,DIR,2016,10,[CEX],


## Merge with geo_info, la_prop, parcels to ID the parcels that are TOC-eligible and have entitlements

In [17]:
m1 = pd.merge(cases2, geo_info1, on = 'CASE_ID', how = 'inner', validate = '1:m')

In [18]:
m2 = pd.merge(m1, la_prop1, on = 'PROP_ID', how = 'inner', validate = 'm:1')

In [19]:
print(f'# obs in m2: {len(m2)}')
print(f'# unique CASE_ID in m2: {m2.CASE_ID.nunique()}')
print(f'# unique pcts_case_id in m2: {m2.pcts_case_id.nunique()}')
print(f'# unique PROP_ID in m2: {m2.PROP_ID.nunique()}')
print(f'# unique ASSR_PRCL_NBR in m2: {m2.ASSR_PRCL_NBR.nunique()}')
display(m2.head())

# obs in m2: 309812
# unique CASE_ID in m2: 22522
# unique pcts_case_id in m2: 7734
# unique PROP_ID in m2: 161919
# unique ASSR_PRCL_NBR in m2: 118635


Unnamed: 0,CASE_ID,CASE_NBR,prefix,year,pcts_case_id,suffix,invalid_prefix,PROP_ID,ASSR_PRCL_NBR
0,216134.0,ZA-2017-3841-CUB,ZA,2017,3841,[CUB],,59123158.0,5032002058
1,210221.0,CPC-2016-3681-DA,CPC,2016,3681,[DA],,59123158.0,5032002058
2,216135.0,ENV-2017-3842-CE,ENV,2017,3842,[CE],,59123158.0,5032002058
3,216364.0,ENV-2017-4038-CE,ENV,2017,4038,[CE],,59123158.0,5032002058
4,210277.0,DIR-2016-3730-VSO,DIR,2016,3730,[VSO],,59037920.0,4240011032


In [21]:
m3 = pd.merge(m2, parcels, left_on = 'ASSR_PRCL_NBR', right_on = 'AIN', how = 'inner', validate = 'm:1')

In [22]:
print(f'# obs in m3: {len(m3)}')
print(f'# unique CASE_ID in m3: {m3.CASE_ID.nunique()}')
print(f'# unique pcts_case_id in m3: {m3.pcts_case_id.nunique()}')
print(f'# unique PROP_ID in m3: {m3.PROP_ID.nunique()}')
print(f'# unique ASSR_PRCL_NBR in m3: {m3.ASSR_PRCL_NBR.nunique()}')
display(m3.head())

# obs in m3: 78753
# unique CASE_ID in m3: 11808
# unique pcts_case_id in m3: 6646
# unique PROP_ID in m3: 40297
# unique ASSR_PRCL_NBR in m3: 22994


Unnamed: 0,CASE_ID,CASE_NBR,prefix,year,pcts_case_id,suffix,invalid_prefix,PROP_ID,ASSR_PRCL_NBR,AIN,TOC_Tier,geometry
0,216134.0,ZA-2017-3841-CUB,ZA,2017,3841,[CUB],,59123158.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."
1,210221.0,CPC-2016-3681-DA,CPC,2016,3681,[DA],,59123158.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."
2,216135.0,ENV-2017-3842-CE,ENV,2017,3842,[CE],,59123158.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."
3,216364.0,ENV-2017-4038-CE,ENV,2017,4038,[CE],,59123158.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."
4,210221.0,CPC-2016-3681-DA,CPC,2016,3681,[DA],,59088025.0,5032002058,5032002058,3,"POLYGON ((6459868.129 1827840.456, 6459903.635..."


In [23]:
m3.TOC_Tier.value_counts()

1    34533
3    24920
2    13240
4     6060
Name: TOC_Tier, dtype: int64