# Entitlements in TOC-eligible parcels

In [2]:
import boto3
import geopandas as gpd
import intake
import numpy as np
import os
import pandas as pd
import utils
import laplan

In [3]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

## Process PCTS
* Won't know which AINs are used in PCTS. Keep all the CASE_NBR-AINs but have a way to identify how many obs to drop later on
* Join parcels to zoning eligible zones
* Want all the entitlements (TOC or non-TOC) after 10/2017 in the TOC-eligible parcels, and then we can see what activity has occurred

In [11]:
def subset_pcts():   
    # Import PCTS - use function to subset
    pcts = catalog.pcts2.read()
    
    FULL_PREFIX = list(laplan.pcts.VALID_PCTS_PREFIX)
    remove_prefix = ["ENV", "PAR", "ADM"]
    prefix = [x for x in FULL_PREFIX if x not in remove_prefix]
    suffix = ["TOC"]

    pcts = laplan.pcts.subset_pcts(
        pcts,
        start_date="2017-10-01",
        prefix_list=prefix,
        suffix_list=suffix,
        get_dummies=True,
    )
        
    # Import parcels
    parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/intermediate/TOC_Parcels.zip')    
    
    # Grab the centroids and count number of duplicate obs
    parcels2 = utils.get_centroid(parcels)
    
    # geoparquets can't be read from S3 directly. Download and read locally.
    zoning_file = "parsed_zoning.parquet"
    s3.download_file(f'{bucket_name}',
                     f'gis/raw/{zoning_file}', f'../gis/{zoning_file}')

    zoning = gpd.read_parquet(f'../gis/{zoning_file}')
    os.remove(f'../gis/{zoning_file}')
    
    eligible_zones = ['R2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5', 
                  'RD1.5', 'RD2', 'RD3', 'RD4', 'RD5', 'RD6', 
                  'C1', 'C2', 'C4', 'C5']

    eligible_zoning = zoning[zoning.zone_class.isin(eligible_zones)]

        
    # Merge PCTS with parcel info to see which TOC Tier it falls within
    m1 = pd.merge(parcels2, pcts, on = 'AIN', how = 'inner', validate = '1:m')   

    # Spatial join with eligible zones and attach the zoning info
    m2 = gpd.sjoin(m1, eligible_zoning, how = 'inner', op = 'intersects').drop(columns = ['index_right'])

    m2 = m2.drop_duplicates()
        
    return m2

In [12]:
def more_pcts_processing(df): 
    # Subset by CASE_ACTION_ID -- let's use all cases for now (but approved cases are 1, 2, 11)
    # We have some NaN CASE_ACTION_IDs, so we won't subset at all
    
    # At this point, no more duplicates by PARENT_CASE - AIN combination
    
    # Subset and keep colums we need
    keep = ['CASE_ID', 'AIN', 'TOC_Tier', 
            'CASE_NBR', 'CASE_SEQ_NBR', 'CASE_YR_NBR', 'id', 
            'CASE_ACTION_ID', 'CASE_FILE_RCV_DT', 'CASE_FILE_DATE', 
            'PARENT_CASE', 'PROJ_DESC_TXT',
            'prefix', 'zone_class', 'centroid', 'TOC']
    
    return df[keep]

In [13]:
def tag_toc_entitlements(df):
    # Save the geometry of the parcels, just use centroids
    parcel_centroids = df[['AIN', 'centroid']].drop_duplicates()
    
    keep_col = ['CASE_NBR', 'id', 'CASE_ACTION_ID', 'CASE_FILE_DATE', 
                'AIN', 'TOC_Tier', 'zone_class', 'TOC']
    
    df = (df[keep_col]
          .assign(
              is_TOC = df.TOC.astype(int)
          ).drop(columns = ["TOC"])
         )
    
    # Make into parcel-level df
    df2 = (df.groupby(['AIN', 'TOC_Tier', 'zone_class', 'is_TOC'])
           .agg({'id':'count'})
           .reset_index()) 

    # Make wide
    df2 = df2.assign(
        num_TOC = df2.apply(lambda row: row.id if row.is_TOC == 1 else np.nan, axis = 1),
        num_nonTOC = df2.apply(lambda row: row.id if row.is_TOC == 0 else np.nan, axis = 1)
    )
    
    
    # If there are multiple obs for the same AIN, fill the NaNs with the max from the other column 
    # Then, drop duplicates
    df2 = df2.assign(
        num_TOC = df2.num_TOC.fillna(df2.groupby('AIN')['num_TOC'].transform('max')),
        num_nonTOC = df2.num_nonTOC.fillna(df2.groupby('AIN')['num_nonTOC'].transform('max'))
    )
    
    df3 = df2.drop_duplicates(subset = ['AIN', 'TOC_Tier', 'zone_class', 'num_TOC', 'num_nonTOC'])

    df3 = (df3.assign(
            num_TOC = df3.num_TOC.fillna(0).astype(int),
            num_nonTOC = df3.num_nonTOC.fillna(0).astype(int)
        ).drop(columns = ['is_TOC', 'id'])
    )

    # Merge geometry back on
    df4 = pd.merge(parcel_centroids, df3, on = 'AIN', how = 'inner', validate = '1:m')
    
    return df4

In [None]:
df1 = subset_pcts()   
df2 = more_pcts_processing(df1)
df = tag_toc_entitlements(df2)



In [None]:
df.head()

## Summary stats

In [8]:
toc_parcels = df[df.num_TOC > 0]
non_toc_parcels = df[df.num_nonTOC > 0]
have_both_parcels = df[(df.num_TOC > 0) & (df.num_nonTOC > 0)]

print(f'# parcels: {len(df)}')
print(f'# parcels with TOC entitlements: {len(toc_parcels)}')
print(f'# parcels with non TOC entitlements: {len(non_toc_parcels)}')
print(f'# parcels with both TOC and non TOC entitlements: {len(have_both_parcels)}')
print(f'double check sum: {len(toc_parcels) + len(non_toc_parcels) - len(have_both_parcels)}')

# parcels: 472
# parcels with TOC entitlements: 472
# parcels with non TOC entitlements: 0
# parcels with both TOC and non TOC entitlements: 0
double check sum: 472


In [9]:
print(f'% parcels with TOC entitlements: {len(toc_parcels) / len(df)}')
print(f'% parcels with non TOC entitlements: {len(non_toc_parcels) / len(df)}')
print(f'% parcels with both entitlements: {len(have_both_parcels) / len(df)}')

% parcels with TOC entitlements: 1.0
% parcels with non TOC entitlements: 0.0
% parcels with both entitlements: 0.0


In [10]:
toc_parcels.zone_class.value_counts()

C2       182
R3       169
R4        82
C4        26
RD1.5      7
R5         3
RAS4       2
RD2        1
Name: zone_class, dtype: int64

In [11]:
non_toc_parcels.zone_class.value_counts()

Series([], Name: zone_class, dtype: int64)

In [12]:
df.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson', 
               f'{bucket_name}', 'gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')

## Breakdown by TOC Tiers

In [13]:
def summarize_by_tiers(df):
    df2 = df.groupby('TOC_Tier').agg({'AIN':'count', 'num_TOC':'sum', 'num_nonTOC':'sum'}).reset_index()
    
    for i in ['TOC', 'nonTOC']:
        new_col = f'pct_{i}'
        numerator = f'num_{i}'
        df2[new_col] = df2[numerator] / (df2.num_TOC + df2.num_nonTOC)
    
    df2['all_AIN'] = df2.AIN.sum()
    df2['pct_AIN'] = df2.AIN / df2.all_AIN
    
    return df2

by_tiers = summarize_by_tiers(df)
by_tiers

Unnamed: 0,TOC_Tier,AIN,num_TOC,num_nonTOC,pct_TOC,pct_nonTOC,all_AIN,pct_AIN
0,0,3,3,0,1.0,0.0,472,0.006356
1,1,74,75,0,1.0,0.0,472,0.15678
2,2,101,102,0,1.0,0.0,472,0.213983
3,3,257,257,0,1.0,0.0,472,0.544492
4,4,37,37,0,1.0,0.0,472,0.07839


## Breakdown by Zone Class

In [14]:
def summarize_by_zones(df):
    df2 = df.groupby('zone_class').agg({'AIN':'count', 'num_TOC':'sum', 'num_nonTOC':'sum'}).reset_index()
    
    for i in ['TOC', 'nonTOC']:
        new_col = f'pct_{i}'
        numerator = f'num_{i}'
        df2[new_col] = df2[numerator] / (df2.num_TOC + df2.num_nonTOC)
    
    df2['all_AIN'] = df2.AIN.sum()
    df2['pct_AIN'] = df2.AIN / df2.all_AIN
    
    return df2

by_zones = summarize_by_zones(df)
by_zones

Unnamed: 0,zone_class,AIN,num_TOC,num_nonTOC,pct_TOC,pct_nonTOC,all_AIN,pct_AIN
0,C2,182,184,0,1.0,0.0,472,0.385593
1,C4,26,26,0,1.0,0.0,472,0.055085
2,R3,169,169,0,1.0,0.0,472,0.358051
3,R4,82,82,0,1.0,0.0,472,0.173729
4,R5,3,3,0,1.0,0.0,472,0.006356
5,RAS4,2,2,0,1.0,0.0,472,0.004237
6,RD1.5,7,7,0,1.0,0.0,472,0.014831
7,RD2,1,1,0,1.0,0.0,472,0.002119


In [15]:
writer = pd.ExcelWriter('../outputs/toc_charts.xlsx', engine = 'xlsxwriter')

by_tiers.to_excel(writer, sheet_name = 'entitlements_by_tier')
by_zones.to_excel(writer, sheet_name = 'entitlements_by_zone')

writer.save()