# Entitlements in TOC-eligible parcels

In [1]:
import boto3
import geopandas as gpd
import intake
import numpy as np
import os
import pandas as pd
import utils
import laplan

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

## Process PCTS
* Won't know which AINs are used in PCTS. Keep all the CASE_NBR-AINs but have a way to identify how many obs to drop later on
* Join parcels to zoning eligible zones
* Want all the entitlements (TOC or non-TOC) after 10/2017 in the TOC-eligible parcels, and then we can see what activity has occurred

In [3]:
def subset_pcts():   
    # Import PCTS - use function to subset
    pcts = catalog.pcts2.read()
    
    FULL_PREFIX = list(laplan.pcts.VALID_PCTS_PREFIX)
    remove_prefix = ["ENV", "PAR", "ADM"]
    prefix = [x for x in FULL_PREFIX if x not in remove_prefix]

    pcts = laplan.pcts.subset_pcts(
        pcts,
        start_date="2017-10-01",
        prefix_list=prefix,
        get_dummies=True,
    )
    
    pcts = laplan.pcts.drop_child_cases(pcts, keep_child_entitlements = True)
    
    dropme = prefix_suffix_cols = prefix + list(laplan.pcts.VALID_PCTS_SUFFIX)
    pcts = pcts.drop(columns = dropme)
    
    # Import parcels
    parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/intermediate/TOC_Parcels.zip')    
    
    # Grab the centroids and count number of duplicate obs
    parcels2 = utils.get_centroid(parcels)
    
    # geoparquets can't be read from S3 directly. Download and read locally.
    zoning_file = "parsed_zoning.parquet"
    s3.download_file(f'{bucket_name}',
                     f'gis/raw/{zoning_file}', f'../gis/{zoning_file}')

    zoning = gpd.read_parquet(f'../gis/{zoning_file}')
    os.remove(f'../gis/{zoning_file}')
    
    eligible_zones = ['R2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5', 
                  'RD1.5', 'RD2', 'RD3', 'RD4', 'RD5', 'RD6', 
                  'C1', 'C2', 'C4', 'C5']

    eligible_zoning = zoning[zoning.zone_class.isin(eligible_zones)]

        
    # Merge PCTS with parcel info to see which TOC Tier it falls within
    m1 = pd.merge(parcels2, pcts, on = 'AIN', how = 'inner', validate = '1:m')   

    # Spatial join with eligible zones and attach the zoning info
    m2 = gpd.sjoin(m1, eligible_zoning, how = 'inner', op = 'intersects').drop(columns = ['index_right'])

    m2 = m2.drop_duplicates()
        
    return m2

In [4]:
def more_pcts_processing(df): 
    # We care about TOC vs non-TOC entitlements
    df = df.assign(
        is_TOC = df.CASE_NBR.str.contains("TOC").astype(int),
    )
    
    # Subset by CASE_ACTION_ID -- let's use all cases for now (but approved cases are 1, 2, 11)
    # We have some NaN CASE_ACTION_IDs, so we won't subset at all
    
    # At this point, no more duplicates by PARENT_CASE - AIN combination
    
    
    # Subset and keep colums we need
    keep = ['CASE_ID', 'AIN', 'TOC_Tier', 
            'CASE_NBR', 'CASE_SEQ_NBR', 'CASE_YR_NBR', 'id', 
            'CASE_ACTION_ID', 'CASE_FILE_RCV_DT', 'CASE_FILE_DATE', 
            'PARENT_CASE', 'PROJ_DESC_TXT',
            'zone_class', 'centroid', 'is_TOC']
    
    return df[keep]

In [5]:
def tag_toc_entitlements(df):
    # Save the geometry of the parcels, just use centroids
    parcel_centroids = df[['AIN', 'centroid']].drop_duplicates()
    
    keep_col = ['CASE_NBR', 'id', 'CASE_ACTION_ID', 'CASE_FILE_DATE', 
                'AIN', 'TOC_Tier', 'zone_class', 'is_TOC']
    
    df = df[keep_col]
         
    # Make into parcel-level df
    df2 = (df.groupby(['AIN', 'TOC_Tier', 'zone_class', 'is_TOC'])
           .agg({'id':'count'})
           .reset_index()) 

    # Make wide
    df2 = df2.assign(
        num_TOC = df2.apply(lambda row: row.id if row.is_TOC == 1 else np.nan, axis = 1),
        num_nonTOC = df2.apply(lambda row: row.id if row.is_TOC == 0 else np.nan, axis = 1)
    )
    
    
    # If there are multiple obs for the same AIN, fill the NaNs with the max from the other column 
    # Then, drop duplicates
    df2 = df2.assign(
        num_TOC = df2.num_TOC.fillna(df2.groupby('AIN')['num_TOC'].transform('max')),
        num_nonTOC = df2.num_nonTOC.fillna(df2.groupby('AIN')['num_nonTOC'].transform('max'))
    )
    
    df3 = df2.drop_duplicates(subset = ['AIN', 'TOC_Tier', 'zone_class', 'num_TOC', 'num_nonTOC'])

    df3 = (df3.assign(
            num_TOC = df3.num_TOC.fillna(0).astype(int),
            num_nonTOC = df3.num_nonTOC.fillna(0).astype(int)
        ).drop(columns = ['is_TOC', 'id'])
    )

    # Merge geometry back on
    df4 = pd.merge(parcel_centroids, df3, on = 'AIN', how = 'inner', validate = '1:m')
    
    return df4

In [6]:
df1 = subset_pcts()   
df2 = more_pcts_processing(df1)
df = tag_toc_entitlements(df2)



In [7]:
df.head()

Unnamed: 0,AIN,centroid,TOC_Tier,zone_class,num_TOC,num_nonTOC
0,2010004040,POINT (6378795.418 1908220.195),1,R3,0,1
1,2010004045,POINT (6378721.467 1908496.944),1,C4,0,1
2,2010004047,POINT (6378828.013 1908358.350),1,C4,0,1
3,2021012016,POINT (6373060.999 1898646.494),1,C2,0,1
4,2021012017,POINT (6373015.575 1898424.105),1,C2,0,1


## Summary stats

In [8]:
toc_parcels = df[df.num_TOC > 0]
non_toc_parcels = df[df.num_nonTOC > 0]
have_both_parcels = df[(df.num_TOC > 0) & (df.num_nonTOC > 0)]

print(f'# parcels: {len(df)}')
print(f'# parcels with TOC entitlements: {len(toc_parcels)}')
print(f'# parcels with non TOC entitlements: {len(non_toc_parcels)}')
print(f'# parcels with both TOC and non TOC entitlements: {len(have_both_parcels)}')
print(f'double check sum: {len(toc_parcels) + len(non_toc_parcels) - len(have_both_parcels)}')

# parcels: 6971
# parcels with TOC entitlements: 472
# parcels with non TOC entitlements: 6528
# parcels with both TOC and non TOC entitlements: 29
double check sum: 6971


In [9]:
print(f'% parcels with TOC entitlements: {len(toc_parcels) / len(df)}')
print(f'% parcels with non TOC entitlements: {len(non_toc_parcels) / len(df)}')
print(f'% parcels with both entitlements: {len(have_both_parcels) / len(df)}')

% parcels with TOC entitlements: 0.06770908047625879
% parcels with non TOC entitlements: 0.9364510113326638
% parcels with both entitlements: 0.00416009180892268


In [10]:
toc_parcels.zone_class.value_counts()

C2       182
R3       169
R4        82
C4        26
RD1.5      7
R5         3
RAS4       2
RD2        1
Name: zone_class, dtype: int64

In [11]:
non_toc_parcels.zone_class.value_counts()

RD1.5    1635
R2       1608
C2       1523
R3        872
C4        307
RD2       215
C1        102
R4         92
RD3        55
RD5        35
C5         28
RAS4       25
R5         25
RAS3        5
RD4         1
Name: zone_class, dtype: int64

In [12]:
df.to_file(driver = 'GeoJSON', 
           filename = '../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson', 
               bucket_name, 
               'gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')

## Breakdown by TOC Tiers

In [13]:
def summarize_by_tiers(df):
    df2 = df.groupby('TOC_Tier').agg({'AIN':'count', 'num_TOC':'sum', 'num_nonTOC':'sum'}).reset_index()
    
    for i in ['TOC', 'nonTOC']:
        new_col = f'pct_{i}'
        numerator = f'num_{i}'
        df2[new_col] = df2[numerator] / (df2.num_TOC + df2.num_nonTOC)
    
    df2['all_AIN'] = df2.AIN.sum()
    df2['pct_AIN'] = df2.AIN / df2.all_AIN
    
    return df2

by_tiers = summarize_by_tiers(df)
by_tiers

Unnamed: 0,TOC_Tier,AIN,num_TOC,num_nonTOC,pct_TOC,pct_nonTOC,all_AIN,pct_AIN
0,0,119,3,121,0.024194,0.975806,6971,0.017071
1,1,4035,75,4131,0.017832,0.982168,6971,0.578827
2,2,1545,102,1506,0.063433,0.936567,6971,0.221632
3,3,1094,257,952,0.212572,0.787428,6971,0.156936
4,4,178,37,185,0.166667,0.833333,6971,0.025534


## Breakdown by Zone Class

In [14]:
def summarize_by_zones(df):
    df2 = df.groupby('zone_class').agg({'AIN':'count', 'num_TOC':'sum', 'num_nonTOC':'sum'}).reset_index()
    
    for i in ['TOC', 'nonTOC']:
        new_col = f'pct_{i}'
        numerator = f'num_{i}'
        df2[new_col] = df2[numerator] / (df2.num_TOC + df2.num_nonTOC)
    
    df2['all_AIN'] = df2.AIN.sum()
    df2['pct_AIN'] = df2.AIN / df2.all_AIN
    
    return df2

by_zones = summarize_by_zones(df)
by_zones

Unnamed: 0,zone_class,AIN,num_TOC,num_nonTOC,pct_TOC,pct_nonTOC,all_AIN,pct_AIN
0,C1,102,0,110,0.0,1.0,6971,0.014632
1,C2,1692,184,1679,0.098765,0.901235,6971,0.24272
2,C4,330,26,352,0.068783,0.931217,6971,0.047339
3,C5,28,0,34,0.0,1.0,6971,0.004017
4,R2,1608,0,1658,0.0,1.0,6971,0.23067
5,R3,1031,169,890,0.159585,0.840415,6971,0.147898
6,R4,172,82,96,0.460674,0.539326,6971,0.024674
7,R5,27,3,28,0.096774,0.903226,6971,0.003873
8,RAS3,5,0,5,0.0,1.0,6971,0.000717
9,RAS4,27,2,30,0.0625,0.9375,6971,0.003873


In [15]:
writer = pd.ExcelWriter('../outputs/toc_charts.xlsx', engine = 'xlsxwriter')

by_tiers.to_excel(writer, sheet_name = 'entitlements_by_tier')
by_zones.to_excel(writer, sheet_name = 'entitlements_by_zone')

writer.save()