# Entitlements in TOC-eligible parcels

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import boto3
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

## Parcels
* Figure out how many are duplicates
* Won't know which AINs are used in PCTS, so keep all of them, but have a way to identify how many obs to drop later on

In [3]:
parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/intermediate/la_parcels_toc.zip')

display(parcels.TOC_Tier.value_counts())
parcels = parcels[parcels.TOC_Tier > 0]

0    442562
1    148994
3    110153
2     65653
4      7844
Name: TOC_Tier, dtype: int64

In [4]:
# Upload just the parcels in TOC Tiers into S3
parcels.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels.geojson', f'{bucket_name}', 'gis/intermediate/toc_eligible_parcels.geojson')

In [5]:
# Grab the centroids and count number of duplicate obs
parcels2 = utils.get_centroid(parcels)

## Grab tables from PCTS

In [6]:
cases = catalog.pcts.tCASE.read()
app = catalog.pcts.tAPLC.read()
geo_info = catalog.pcts.tPROP_GEO_INFO.read()
la_prop = catalog.pcts.tLA_PROP.read()

In [7]:
cases1 = cases[['CASE_ID', 'APLC_ID', 'CASE_NBR', 'CASE_SEQ_NBR', 'CASE_YR_NBR', 'CASE_ACTION_ID', 'ADM_ACTION_DT']]
app1 = app[['APLC_ID', 'PROJ_DESC_TXT']]
geo_info1 = geo_info[['CASE_ID', 'PROP_ID']]
la_prop1 = la_prop[la_prop.ASSR_PRCL_NBR.notna()][['PROP_ID', 'ASSR_PRCL_NBR']]

### Subset cases, keep 2016 and after

In [8]:
cases2 = cases1[cases1.CASE_YR_NBR >= 2016]

## Parent cases only

In [9]:
parents = pd.read_parquet('../data/parent_cases.parquet')

cases3 = pd.merge(cases2, parents, on = 'CASE_ID', how = 'inner', validate = '1:1')

## Merge with geo_info, la_prop, parcels to ID the parcels that and have entitlements (2016 - after)

In [10]:
m1 = pd.merge(cases3, geo_info1, on = 'CASE_ID', how = 'inner', validate = '1:m')

In [11]:
m2 = pd.merge(m1, la_prop1, on = 'PROP_ID', how = 'inner', validate = 'm:1')

In [12]:
m3 = pd.merge(m2, parcels, left_on = 'ASSR_PRCL_NBR', right_on = 'AIN', how = 'inner', validate = 'm:1')

## Join parcels to zoning and subset to eligible zones
* Subset by eligible zones, see how many TOC-eligible parcels also fall into eligible zones

In [13]:
zoning = gpd.read_file(f's3://{bucket_name}/gis/raw/parsed_zoning.geojson')

In [14]:
eligible_zones = ['R2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5', 
                  'RD1.5', 'RD2', 'RD3', 'RD4', 'RD5', 'RD6', 
                  'C1', 'C2', 'C4', 'C5']

eligible_zoning = zoning[zoning.zone_class.isin(eligible_zones)]

In [15]:
parcels_with_zoning = gpd.sjoin(parcels2, eligible_zoning, how = 'inner', op = 'intersects').drop(columns = ['index_right'])

In [16]:
# Merge in zoning and TOC info about the parcel
m4 = pd.merge(m3, parcels_with_zoning, on = ['AIN', 'centroid', 'TOC_Tier'], how = 'inner')

In [17]:
# Merge in project description
m5 = pd.merge(m4, app1, on = 'APLC_ID', how = 'inner', validate = 'm:1')

# Drop duplicates
m5 = m5.drop_duplicates()

## Parse the PCTS string and grab prefix
* Drop duplicates
* Drop ENV, ADM, PAR cases
* Subset by CASE_ACTION_ID

In [18]:
parsed_col_names = ['prefix']

def parse_pcts(row):
    try:
        z = utils.PCTSCaseNumber(row.CASE_NBR)
        return pd.Series([z.prefix], index = parsed_col_names)
    except ValueError:
        return pd.Series([z.prefix], index = parsed_col_names)

parsed = m5.apply(parse_pcts, axis = 1)

m6 = pd.concat([m5, parsed], axis = 1)

In [19]:
# Drop duplicates
drop = ['centroid', 'x', 'y', 'obs', 'num_obs', 
        'CASE_ID', 'APLC_ID', 'ASSR_PRCL_NBR', 'PROP_ID', 
        'o1', 'o2', 'o3', 'o1_descrip', 'o2_descrip', 'o3_descrip']

m6 = m6.drop(columns = drop)

# Create new id variable that is just seq number and year. Need a way to get rid of duplicate cases.
m6['id'] = m6.CASE_SEQ_NBR.astype(int).astype(str) + '_' + m6.CASE_YR_NBR.astype(int).astype(str)

In [20]:
cols_we_need = ['id', 'CASE_ACTION_ID', 'ADM_ACTION_DT', 'AIN', 'TOC_Tier', 'ZONE_CMPLT', 'PROJ_DESC_TXT']

m7 = m6.drop_duplicates(subset = cols_we_need)

In [21]:
# Subset by prefix
drop_prefix = ['ENV', 'ADM', 'PAR']

m7 = m7.loc[~m6.prefix.isin(drop_prefix)]

In [22]:
# Subset by CASE_ACTION_ID
display(m7.CASE_ACTION_ID.value_counts())

approved_cases = [1, 2, 11]

m8 = m7.loc[m7.CASE_ACTION_ID.isin(approved_cases)]

2.0     2506
1.0      895
4.0      146
6.0      110
3.0       52
11.0      18
7.0        3
9.0        2
5.0        1
8.0        1
10.0       1
Name: CASE_ACTION_ID, dtype: int64

#### At this point, no more duplicates by PARENT_CASE-AIN

## Additional subsetting for TOC-eligible parcels

In [23]:
# Tag case as TOC or not
m8['is_TOC'] = m8.CASE_NBR.str.contains('TOC').astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [24]:
keep_col = ['CASE_NBR', 'id', 'CASE_ACTION_ID', 'ADM_ACTION_DT', 'AIN', 'TOC_Tier', 'zone_class', 'is_TOC']

m9 = m8[keep_col]

## Make into parcel level df

In [25]:
m10 = m9.groupby(['AIN', 'TOC_Tier', 'zone_class', 'is_TOC']).agg({'id':'count'}).reset_index()

In [26]:
# Make wide
m10['num_TOC'] = m10.apply(lambda row: row.id if row.is_TOC == 1 else np.nan, axis = 1) 
m10['num_nonTOC'] = m10.apply(lambda row: row.id if row.is_TOC == 0 else np.nan, axis = 1)

In [27]:
# If there are multiple obs for the same AIN, fill the NaNs with the max from the other column, so we can just drop duplicates after
for col in ['num_TOC', 'num_nonTOC']:
    m10[col] = m10[col].fillna(m10.groupby('AIN')[col].transform('max'))

In [28]:
m11 = m10.drop_duplicates(subset = ['AIN', 'TOC_Tier', 'zone_class', 'num_TOC', 'num_nonTOC'])

for col in ['num_TOC', 'num_nonTOC']:
    m11[col] = m11[col].fillna(0).astype(int)

m11 = m11.drop(columns = ['is_TOC', 'id'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [29]:
toc_parcels = m11[m11.num_TOC > 0]
non_toc_parcels = m11[m11.num_nonTOC > 0]
have_both_parcels = m11[(m11.num_TOC > 0) & (m11.num_nonTOC > 0)]

print(f'# parcels: {len(m11)}')
print(f'# parcels with TOC entitlements: {len(toc_parcels)}')
print(f'# parcels with non TOC entitlements: {len(non_toc_parcels)}')
print(f'# parcels with both TOC and non TOC entitlements: {len(have_both_parcels)}')
print(f'double check sum: {len(toc_parcels) + len(non_toc_parcels) - len(have_both_parcels)}')

# parcels: 2796
# parcels with TOC entitlements: 228
# parcels with non TOC entitlements: 2604
# parcels with both TOC and non TOC entitlements: 36
double check sum: 2796


In [30]:
print(f'% parcels with TOC entitlements: {len(toc_parcels) / len(m11)}')
print(f'% parcels with non TOC entitlements: {len(non_toc_parcels) / len(m11)}')
print(f'% parcels with both entitlements: {len(have_both_parcels) / len(m11)}')

% parcels with TOC entitlements: 0.0815450643776824
% parcels with non TOC entitlements: 0.9313304721030042
% parcels with both entitlements: 0.012875536480686695


In [31]:
m11.TOC_Tier.value_counts()

3    1407
1     669
2     525
4     195
Name: TOC_Tier, dtype: int64

In [32]:
toc_parcels.zone_class.value_counts()

C2       87
R3       67
R4       50
C4       13
RD1.5     6
RAS4      2
R2        1
R5        1
RD2       1
Name: zone_class, dtype: int64

In [33]:
non_toc_parcels.zone_class.value_counts()

C2       862
R2       439
C4       308
RD1.5    273
R3       259
RD2      189
R4       105
R5        40
C5        39
RD3       32
C1        28
RAS4      21
RD5        7
RD4        1
RAS3       1
Name: zone_class, dtype: int64

In [34]:
m12 = pd.merge(m11, parcels2, on = ['AIN', 'TOC_Tier'], how = 'inner').drop(columns = ['x', 'y', 'obs', 'num_obs'])

In [35]:
m12.rename(columns = {'centroid':'geometry'}, inplace = True)
m12 = gpd.GeoDataFrame(m12)
m12.crs = {'init':'epsg:2229'}

In [36]:
m12.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson', f'{bucket_name}', 'gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')

## Breakdown by TOC Tiers

In [37]:
m13 = m12.groupby('TOC_Tier').agg({'AIN':'count', 'num_TOC':'sum', 'num_nonTOC':'sum'}).reset_index()

for i in ['TOC', 'nonTOC']:
    new_col = f'pct_{i}'
    numerator = f'num_{i}'
    m13[new_col] = m13[numerator] / (m13.num_TOC + m13.num_nonTOC)
    
m13['all_AIN'] = m13.AIN.sum()
m13['pct_AIN'] = m13.AIN / m13.all_AIN

m13

Unnamed: 0,TOC_Tier,AIN,num_TOC,num_nonTOC,pct_TOC,pct_nonTOC,all_AIN,pct_AIN
0,1,669,30,781,0.036991,0.963009,2796,0.23927
1,2,525,51,554,0.084298,0.915702,2796,0.187768
2,3,1407,137,1553,0.081065,0.918935,2796,0.503219
3,4,195,11,302,0.035144,0.964856,2796,0.069742


## Breakdown by Zone Class

In [38]:
m14 = m12.groupby('zone_class').agg({'AIN': 'count', 'num_TOC': 'sum', 'num_nonTOC': 'sum'}).reset_index()

for i in ['TOC', 'nonTOC']:
    new_col = f'pct_{i}'
    numerator = f'num_{i}'
    m14[new_col] = m14[numerator] / (m14.num_TOC + m14.num_nonTOC)
    
m14['all_AIN'] = m14.AIN.sum()
m14['pct_AIN'] = m14.AIN / m14.all_AIN

m14

Unnamed: 0,zone_class,AIN,num_TOC,num_nonTOC,pct_TOC,pct_nonTOC,all_AIN,pct_AIN
0,C1,28,0,31,0.0,1.0,2796,0.010014
1,C2,930,88,1142,0.071545,0.928455,2796,0.332618
2,C4,316,13,421,0.029954,0.970046,2796,0.113019
3,C5,39,0,49,0.0,1.0,2796,0.013948
4,R2,439,1,485,0.002058,0.997942,2796,0.15701
5,R3,324,67,277,0.194767,0.805233,2796,0.11588
6,R4,150,50,108,0.316456,0.683544,2796,0.053648
7,R5,41,1,54,0.018182,0.981818,2796,0.014664
8,RAS3,1,0,1,0.0,1.0,2796,0.000358
9,RAS4,21,2,23,0.08,0.92,2796,0.007511


In [39]:
writer = pd.ExcelWriter('../outputs/toc_charts.xlsx', engine = 'xlsxwriter')

m13.to_excel(writer, sheet_name = 'entitlements_by_tier')
m14.to_excel(writer, sheet_name = 'entitlements_by_zone')

writer.save()