# Entitlements in TOC-eligible parcels

In [1]:
import boto3
import intake
import numpy as np
import geopandas as gpd
import pandas as pd
import pcts_parser
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

## Parcels
* Won't know which AINs are used in PCTS, so keep all of them, but have a way to identify how many obs to drop later on
* 5/26: when we switched out our spatially joined toc-eligible parcels with the crosswalk of toc-eligible parcels, the total number of eligible parcels dropped by a lot. The crosswalk is about 12% the size of the original one (which albeit, would have shrunk once we accounted for zoning). But once parent are attached parcels and a parcel-level df is constructed, we went from 200 parcels to about 50 parcels. 
* 5/28: let's just use our file first, until we get confirmation from Planning. But, simply cleaning up this notebook and using functions...we are able to reproduce the results we had before.

In [3]:
""" NEW METHOD
parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/raw/la_parcels.zip')

toc_parcels = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_toc2017_parcels.parquet')

parcels = pd.merge(parcels, toc_parcels, on = 'AIN', how = 'inner', validate = '1:1').to_crs({'init':'epsg:2229'})
display(parcels.TOC_Tier.value_counts())

# Upload just the parcels in TOC Tiers into S3
parcels.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson', 
               f'{bucket_name}', 'gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson')
"""

" NEW METHOD\nparcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/raw/la_parcels.zip')\n\ntoc_parcels = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_toc2017_parcels.parquet')\n\nparcels = pd.merge(parcels, toc_parcels, on = 'AIN', how = 'inner', validate = '1:1').to_crs({'init':'epsg:2229'})\ndisplay(parcels.TOC_Tier.value_counts())\n\n# Upload just the parcels in TOC Tiers into S3\nparcels.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson')\n\ns3.upload_file('../gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson', \n               f'{bucket_name}', 'gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson')\n"

In [4]:
""" NEW METHOD
parcels = gpd.read_file(f's3://{bucket_name}/gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson')

# Grab the centroids and count number of duplicate obs
parcels2 = utils.get_centroid(parcels)
"""

" NEW METHOD\nparcels = gpd.read_file(f's3://{bucket_name}/gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson')\n\n# Grab the centroids and count number of duplicate obs\nparcels2 = utils.get_centroid(parcels)\n"

In [5]:
""" ORIGINAL METHOD
parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/intermediate/la_parcels_toc.zip')

parcels = parcels[parcels.TOC_Tier > 0]

# Upload just the parcels in TOC Tiers into S3
parcels.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels.geojson', f'{bucket_name}', 
               'gis/intermediate/toc_eligible_parcels.geojson')
"""

" ORIGINAL METHOD\nparcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/intermediate/la_parcels_toc.zip')\n\nparcels = parcels[parcels.TOC_Tier > 0]\n\n# Upload just the parcels in TOC Tiers into S3\nparcels.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels.geojson')\n\ns3.upload_file('../gis/intermediate/toc_eligible_parcels.geojson', f'{bucket_name}', \n               'gis/intermediate/toc_eligible_parcels.geojson')\n"

## Import files

In [6]:
pcts = pd.read_parquet(f's3://{bucket_name}/data/final/master_pcts.parquet')
pcts = pcts[(pcts.CASE_FILE_DATE >= '2017-10') & 
            (pcts.CASE_ID == pcts.PARENT_CASE)]

In [7]:
parcels = gpd.read_file(f's3://{bucket_name}/gis/intermediate/toc_eligible_parcels.geojson')

# Grab the centroids and count number of duplicate obs
parcels2 = utils.get_centroid(parcels)

  return _prepare_from_string(" ".join(pjargs))


In [8]:
# Subset to eligible zones and see which TOC-eligible parcels also fall in eligible zones
zoning = gpd.read_file(f's3://{bucket_name}/gis/raw/parsed_zoning.geojson')

eligible_zones = ['R2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5', 
              'RD1.5', 'RD2', 'RD3', 'RD4', 'RD5', 'RD6', 
              'C1', 'C2', 'C4', 'C5']

eligible_zoning = zoning[zoning.zone_class.isin(eligible_zones)]

parcels_with_zoning = gpd.sjoin(parcels2, eligible_zoning, 
                                how = 'inner', op = 'intersects').drop(columns = ['index_right'])

## Process PCTS
* Join parcels to zoning
* Subset for eligible zones and eligible PCTS prefixes to see how many TOC-eligible parcels fall into eligible zones

In [9]:
def zoning_pcts_processing(df, parcels_with_zoning): 
    # Merge in zoning and TOC info about the parcel
    m1 = pd.merge(df, parcels_with_zoning, on = ['AIN'], how = 'inner')
    
    # Drop duplicates
    m1 = m1.drop_duplicates()

    # Parse PCTS string and grab prefix
    parsed_col_names = ['prefix']

    def parse_pcts(row):
        try:
            z = pcts_parser.PCTSCaseNumber(row.CASE_NBR)
            return pd.Series([z.prefix], index = parsed_col_names)
        except ValueError:
            return pd.Series([z.prefix], index = parsed_col_names)

    parsed = m1.apply(parse_pcts, axis = 1)
    m2 = pd.concat([m1, parsed], axis = 1)
    
   
    # Subset by PCTS prefix, drop ENV/ADM/PAR cases
    drop_prefix = ['ENV', 'ADM', 'PAR']
    m3 = m2.loc[~m2.prefix.isin(drop_prefix)]
    
    # Subset by CASE_ACTION_ID -- let's use all cases for now (but approved cases are 1, 2, 11)
    approved_cases = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
    m4 = m3.loc[m3.CASE_ACTION_ID.isin(approved_cases)]
        
    # At this point, no more duplicates by PARENT_CASE - AIN combination
    return m4

In [10]:
def tag_toc_entitlements(df):
    keep_col = ['CASE_NBR', 'id', 'CASE_ACTION_ID', 'CASE_FILE_DATE', 
            'AIN', 'TOC_Tier', 'zone_class']
    
    df = (df[keep_col]
          .assign(is_TOC = df.CASE_NBR.str.contains('TOC').astype(int))
         )
    
    # Make into parcel-level df
    df2 = (df.groupby(['AIN', 'TOC_Tier', 'zone_class', 'is_TOC'])
           .agg({'id':'count'})
           .reset_index()) 

    # Make wide
    df2 = df2.assign(
        num_TOC = df2.apply(lambda row: row.id if row.is_TOC == 1 else np.nan, axis = 1),
        num_nonTOC = df2.apply(lambda row: row.id if row.is_TOC == 0 else np.nan, axis = 1)
    )
    
    
    # If there are multiple obs for the same AIN, fill the NaNs with the max from the other column 
    # Then, drop duplicates
    df2 = df2.assign(
        num_TOC = df2.num_TOC.fillna(df2.groupby('AIN')['num_TOC'].transform('max')),
        num_nonTOC = df2.num_nonTOC.fillna(df2.groupby('AIN')['num_nonTOC'].transform('max'))
    )
    
    df3 = df2.drop_duplicates(subset = ['AIN', 'TOC_Tier', 'zone_class', 'num_TOC', 'num_nonTOC'])

    df3 = (df3.assign(
            num_TOC = df3.num_TOC.fillna(0).astype(int),
            num_nonTOC = df3.num_nonTOC.fillna(0).astype(int)
        ).drop(columns = ['is_TOC', 'id'])
    )
    
    # Merge in centroids for these parcels (much easier to plot)
    df4 = pd.merge(df3, parcels2, on = ['AIN', 'TOC_Tier'], how = 'inner').drop(
                    columns = ['x', 'y', 'obs', 'num_obs'])
    
    df4.rename(columns = {'centroid':'geometry'}, inplace = True)
    df4 = gpd.GeoDataFrame(df4)
    df4.crs = {'init':'epsg:2229'}

    return df4

In [11]:
pcts2 = zoning_pcts_processing(pcts, parcels_with_zoning)

In [12]:
df = tag_toc_entitlements(pcts2)

  return _prepare_from_string(" ".join(pjargs))


## Summary stats

In [13]:
toc_parcels = df[df.num_TOC > 0]
non_toc_parcels = df[df.num_nonTOC > 0]
have_both_parcels = df[(df.num_TOC > 0) & (df.num_nonTOC > 0)]

print(f'# parcels: {len(df)}')
print(f'# parcels with TOC entitlements: {len(toc_parcels)}')
print(f'# parcels with non TOC entitlements: {len(non_toc_parcels)}')
print(f'# parcels with both TOC and non TOC entitlements: {len(have_both_parcels)}')
print(f'double check sum: {len(toc_parcels) + len(non_toc_parcels) - len(have_both_parcels)}')

# parcels: 1219
# parcels with TOC entitlements: 234
# parcels with non TOC entitlements: 1004
# parcels with both TOC and non TOC entitlements: 19
double check sum: 1219


In [14]:
print(f'% parcels with TOC entitlements: {len(toc_parcels) / len(df)}')
print(f'% parcels with non TOC entitlements: {len(non_toc_parcels) / len(df)}')
print(f'% parcels with both entitlements: {len(have_both_parcels) / len(df)}')

% parcels with TOC entitlements: 0.19196062346185397
% parcels with non TOC entitlements: 0.8236259228876128
% parcels with both entitlements: 0.015586546349466776


In [15]:
toc_parcels.zone_class.value_counts()

C2       86
R3       73
R4       50
C4       13
RD1.5     6
R5        2
RAS4      2
RD2       1
R2        1
Name: zone_class, dtype: int64

In [16]:
non_toc_parcels.zone_class.value_counts()

C2       420
C4       166
R3       107
RD1.5     91
R2        74
RD2       60
R4        30
C5        17
C1        13
RD3       10
R5         9
RAS4       6
RD5        1
Name: zone_class, dtype: int64

In [17]:
df.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson', 
               f'{bucket_name}', 'gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')

## Breakdown by TOC Tiers

In [18]:
def summarize_by_tiers(df):
    df2 = df.groupby('TOC_Tier').agg({'AIN':'count', 'num_TOC':'sum', 'num_nonTOC':'sum'}).reset_index()
    
    for i in ['TOC', 'nonTOC']:
        new_col = f'pct_{i}'
        numerator = f'num_{i}'
        df2[new_col] = df2[numerator] / (df2.num_TOC + df2.num_nonTOC)
    
    df2['all_AIN'] = df2.AIN.sum()
    df2['pct_AIN'] = df2.AIN / df2.all_AIN
    
    return df2

by_tiers = summarize_by_tiers(df)
by_tiers

Unnamed: 0,TOC_Tier,AIN,num_TOC,num_nonTOC,pct_TOC,pct_nonTOC,all_AIN,pct_AIN
0,1,280,31,279,0.1,0.9,1219,0.229696
1,2,249,55,203,0.213178,0.786822,1219,0.204266
2,3,591,138,520,0.209726,0.790274,1219,0.484824
3,4,99,11,137,0.074324,0.925676,1219,0.081214


## Breakdown by Zone Class

In [19]:
def summarize_by_zones(df):
    df2 = df.groupby('zone_class').agg({'AIN':'count', 'num_TOC':'sum', 'num_nonTOC':'sum'}).reset_index()
    
    for i in ['TOC', 'nonTOC']:
        new_col = f'pct_{i}'
        numerator = f'num_{i}'
        df2[new_col] = df2[numerator] / (df2.num_TOC + df2.num_nonTOC)
    
    df2['all_AIN'] = df2.AIN.sum()
    df2['pct_AIN'] = df2.AIN / df2.all_AIN
    
    return df2

by_zones = summarize_by_zones(df)
by_zones

Unnamed: 0,zone_class,AIN,num_TOC,num_nonTOC,pct_TOC,pct_nonTOC,all_AIN,pct_AIN
0,C1,13,0,14,0.0,1.0,1219,0.010664
1,C2,495,87,481,0.153169,0.846831,1219,0.406071
2,C4,177,13,211,0.058036,0.941964,1219,0.145201
3,C5,17,0,20,0.0,1.0,1219,0.013946
4,R2,74,1,78,0.012658,0.987342,1219,0.060705
5,R3,177,73,114,0.390374,0.609626,1219,0.145201
6,R4,79,50,31,0.617284,0.382716,1219,0.064807
7,R5,11,2,10,0.166667,0.833333,1219,0.009024
8,RAS4,8,2,7,0.222222,0.777778,1219,0.006563
9,RD1.5,96,6,99,0.057143,0.942857,1219,0.078753


In [20]:
writer = pd.ExcelWriter('../outputs/toc_charts.xlsx', engine = 'xlsxwriter')

by_tiers.to_excel(writer, sheet_name = 'entitlements_by_tier')
by_zones.to_excel(writer, sheet_name = 'entitlements_by_zone')

writer.save()