# Entitlements in TOC-eligible parcels

In [1]:
import boto3
import intake
import numpy as np
import pandas as pd
import geopandas as gpd
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

## Compare methods 1 and 2 to see why parcels with entitlements differ so much

In [None]:
# Original method
"""
parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/intermediate/la_parcels_toc.zip')

display(parcels.TOC_Tier.value_counts())
parcels = parcels[parcels.TOC_Tier > 0]

# Upload just the parcels in TOC Tiers into S3
parcels.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels.geojson', f'{bucket_name}', 
               'gis/intermediate/toc_eligible_parcels.geojson')
"""

## Parcels
* Figure out how many are duplicates
* Won't know which AINs are used in PCTS, so keep all of them, but have a way to identify how many obs to drop later on

In [None]:
"""
parcels = gpd.read_file(f'zip+s3://{bucket_name}/gis/raw/la_parcels.zip')

toc_parcels = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_toc2017_parcels.parquet')

parcels = pd.merge(parcels, toc_parcels, on = 'AIN', how = 'inner', validate = '1:1').to_crs({'init':'epsg:2229'})
display(parcels.TOC_Tier.value_counts())

# Upload just the parcels in TOC Tiers into S3
parcels.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson', 
               f'{bucket_name}', 'gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson')
"""

In [None]:
parcels_withcrosswalk = gpd.read_file(f's3://{bucket_name}/gis/intermediate/toc_eligible_parcels_withcrosswalk.geojson')

# Grab the centroids and count number of duplicate obs
parcels2_withcrosswalk = utils.get_centroid(parcels_withcrosswalk)

In [None]:
parcels = gpd.read_file(f's3://{bucket_name}/gis/intermediate/toc_eligible_parcels.geojson')

# Grab the centroids and count number of duplicate obs
parcels2 = utils.get_centroid(parcels)

In [None]:
# Subset to eligible zones and see which TOC-eligible parcels also fall in eligible zones
zoning = gpd.read_file(f's3://{bucket_name}/gis/raw/parsed_zoning.geojson')

eligible_zones = ['R2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5', 
              'RD1.5', 'RD2', 'RD3', 'RD4', 'RD5', 'RD6', 
              'C1', 'C2', 'C4', 'C5']

eligible_zoning = zoning[zoning.zone_class.isin(eligible_zones)]

In [None]:
parcels_with_zoning = gpd.sjoin(parcels2, eligible_zoning, 
                            how = 'inner', op = 'intersects').drop(columns = ['index_right'])

parcels_with_zoning.to_file(driver = 'GeoJSON', filename = '../gis/parcels_with_zoning.geojson')

In [None]:
parcels_with_zoning_withcrosswalk = gpd.sjoin(parcels2_withcrosswalk, eligible_zoning, 
                                 how = 'inner', op = 'intersects').drop(columns = ['index_right'])

parcels_with_zoning_withcrosswalk.to_file(driver = 'GeoJSON', 
                                          filename = '../gis/parcels_with_zoning_withcrosswalk.geojson')


In [8]:
parcels_with_zoning = gpd.read_file('../gis/parcels_with_zoning.geojson').drop_duplicates()
parcels_with_zoning_withcrosswalk = gpd.read_file('../gis/parcels_with_zoning_withcrosswalk.geojson').drop_duplicates()

In [31]:
parcels_with_zoning.AIN.nunique()

187017

In [33]:
parcels_with_zoning_withcrosswalk.AIN.nunique()

23350

In [34]:
parcels_with_zoning_withcrosswalk.head()

Unnamed: 0,AIN,TOC_Tier,x,y,obs,num_obs,ZONE_CMPLT,ZONE_CLASS1,ZONE_SMRY,Q,...,specific_plan,height_district,D,o1,o2,o3,o1_descrip,o2_descrip,o3_descrip,geometry
0,2010004040,1,6378795.0,1908220.0,1,1,R3-1,R3,RESIDENTIAL,0,...,,1,0,,,,,,,POINT (6378795.418 1908220.195)
1,2024023012,2,6373280.0,1893585.0,1,1,C4-1VL,C4,COMMERCIAL,0,...,,1VL,0,,,,,,,POINT (6373279.994 1893585.043)
2,2026008042,2,6372778.0,1893589.0,1,1,(Q)RD1.5-1,RD1.5,RESIDENTIAL,1,...,,1,0,,,,,,,POINT (6372778.488 1893589.192)
3,2102002038,1,6399730.0,1901704.0,1,1,R3-1,R3,RESIDENTIAL,0,...,,1,0,,,,,,,POINT (6399730.096 1901704.165)
4,2102015002,1,6399758.0,1900887.0,1,1,R3-1,R3,RESIDENTIAL,0,...,,1,0,,,,,,,POINT (6399758.157 1900886.658)


## Process PCTS
* Subset and merge together tables within PCTS
* Join parcels to zoning
* Subset for eligible zones and eligible PCTS prefixes to see how many TOC-eligible parcels fall into eligible zones

In [9]:
def merge_pcts():
    # Grab tables from PCTS
    cases = pd.read_parquet('../data/tCASE.parquet')
    app = pd.read_parquet('../data/tAPLC.parquet')
    geo_info = pd.read_parquet('../data/tPROP_GEO_INFO.parquet')
    la_prop = pd.read_parquet('../data/tLA_PROP.parquet')
    #cases = catalog.pcts.tCASE.read()
    #app = catalog.pcts.tAPLC.read()
    #geo_info = catalog.pcts.tPROP_GEO_INFO.read()
    #la_prop = catalog.pcts.tLA_PROP.read()
    
    # Subset dataframes before merging
    cases = cases.assign(
        # Grab the year-month from received date
        CASE_FILE_DATE = pd.to_datetime(cases['CASE_FILE_RCV_DT']).dt.to_period('M'),
    )
    # Subset to Oct 2017 and after    
    cases = cases[cases.CASE_FILE_DATE >= '2017-10'][['CASE_ID', 'APLC_ID', 'CASE_NBR', 
                                                      'CASE_SEQ_NBR', 'CASE_YR_NBR', 'CASE_ACTION_ID', 
                                                      'CASE_FILE_RCV_DT', 'CASE_FILE_DATE']]    
    
    app = app[['APLC_ID', 'PROJ_DESC_TXT']]
    geo_info = geo_info[['CASE_ID', 'PROP_ID']]
    la_prop = la_prop[la_prop.ASSR_PRCL_NBR.notna()][['PROP_ID', 'ASSR_PRCL_NBR']]
    parents = pd.read_parquet(f's3://{bucket_name}/data/intermediate/parent_cases.parquet')

    # Merge in parent cases
    cases2 = pd.merge(cases, parents, on = 'CASE_ID', how = 'inner', validate = '1:1')
    
    # Merge with geo_info, la_prop, parcels to ID the parcels that have entitlements (10/2017 and after)
    m1 = pd.merge(cases2, geo_info, on = 'CASE_ID', how = 'inner', validate = '1:m')
    m2 = pd.merge(m1, la_prop, on = 'PROP_ID', how = 'inner', validate = 'm:1')
    m3 = pd.merge(m2, parcels, left_on = 'ASSR_PRCL_NBR', right_on = 'AIN', how = 'inner', validate = 'm:1')
    m4 = pd.merge(m3, app, on = 'APLC_ID', how = 'left', validate = 'm:1')
    
    m4 = m4.assign(
        id = m4.CASE_SEQ_NBR.astype(int).astype(str) + '_' + m4.CASE_YR_NBR.astype(int).astype(str)
    )  
  
    m4 = m4.drop_duplicates(subset = ['id', 'CASE_FILE_DATE', 'AIN', 'TOC_Tier', 'PROJ_DESC_TXT'])

    return m4

In [26]:
def zoning_pcts_processing(df, parcels_with_zoning): 
    # Merge in zoning and TOC info about the parcel
    m1 = pd.merge(df, parcels_with_zoning, on = ['AIN', 'centroid', 'TOC_Tier'], how = 'inner')
    
    # Drop duplicates
    m1 = m1.drop_duplicates()

    # Parse PCTS string and grab prefix
    parsed_col_names = ['prefix']

    def parse_pcts(row):
        try:
            z = utils.PCTSCaseNumber(row.CASE_NBR)
            return pd.Series([z.prefix], index = parsed_col_names)
        except ValueError:
            return pd.Series([z.prefix], index = parsed_col_names)

    parsed = m1.apply(parse_pcts, axis = 1)
    m2 = pd.concat([m1, parsed], axis = 1)
    
   
    # Subset by PCTS prefix, drop ENV/ADM/PAR cases
    drop_prefix = ['ENV', 'ADM', 'PAR']
    m3 = m2.loc[~m2.prefix.isin(drop_prefix)]
    
    # Subset by CASE_ACTION_ID -- let's use all cases for now (but approved cases are 1, 2, 11)

    # At this point, no more duplicates by PARENT_CASE - AIN combination
    return m3

In [None]:
def tag_toc_entitlements(df):
    keep_col = ['CASE_NBR', 'id', 'CASE_ACTION_ID', 'CASE_FILE_DATE', 
            'AIN', 'TOC_Tier', 'zone_class']
    
    df = (df[keep_col]
          .assign(is_TOC = df.CASE_NBR.str.contains('TOC').astype(int))
         )
    
    # Make into parcel-level df
    df2 = (df.groupby(['AIN', 'TOC_Tier', 'zone_class', 'is_TOC'])
           .agg({'id':'count'})
           .reset_index()) 

    # Make wide
    df2 = df2.assign(
        num_TOC = df2.apply(lambda row: row.id if row.is_TOC == 1 else np.nan, axis = 1),
        num_nonTOC = df2.apply(lambda row: row.id if row.is_TOC == 0 else np.nan, axis = 1)
    )
    
    
    # If there are multiple obs for the same AIN, fill the NaNs with the max from the other column 
    # Then, drop duplicates
    df2 = df2.assign(
        num_TOC = df2.num_TOC.fillna(df2.groupby('AIN')['num_TOC'].transform('max')),
        num_nonTOC = df2.num_nonTOC.fillna(df2.groupby('AIN')['num_nonTOC'].transform('max'))
    )
    
    df3 = df2.drop_duplicates(subset = ['AIN', 'TOC_Tier', 'zone_class', 'num_TOC', 'num_nonTOC'])

    df3 = (df3.assign(
            num_TOC = df3.num_TOC.fillna(0).astype(int),
            num_nonTOC = df3.num_nonTOC.fillna(0).astype(int)
        ).drop(columns = ['is_TOC', 'id'])
    )
    
    # Merge in centroids for these parcels (much easier to plot)
    df4 = pd.merge(df3, parcels2, on = ['AIN', 'TOC_Tier'], how = 'inner').drop(
                    columns = ['x', 'y', 'obs', 'num_obs'])
    
    df4.rename(columns = {'centroid':'geometry'}, inplace = True)
    df4 = gpd.GeoDataFrame(df4)
    df4.crs = {'init':'epsg:2229'}

    return df4

In [10]:
pcts1 = merge_pcts(parcels_with_zoning)

In [11]:
pcts11 = merge_pcts(parcels_with_zoning_withcrosswalk)

In [27]:
pcts2 = zoning_pcts_processing(pcts1, parcels_with_zoning)

In [28]:
pcts22 = zoning_pcts_processing(pcts2, parcels_with_zoning_withcrosswalk)

ValueError: cannot reindex from a duplicate axis

###### 

In [None]:
pcts3 = tag_toc_entitlements(pcts2)

In [None]:
pcts33 = tag_toc_entitlements(pcts22)

In [None]:
toc_parcels = pcts3[pcts3.num_TOC > 0]
non_toc_parcels = pcts3[pcts3.num_nonTOC > 0]
have_both_parcels = pcts3[(pcts3.num_TOC > 0) & (pcts3.num_nonTOC > 0)]

print(f'# parcels: {len(pcts3)}')
print(f'# parcels with TOC entitlements: {len(toc_parcels)}')
print(f'# parcels with non TOC entitlements: {len(non_toc_parcels)}')
print(f'# parcels with both TOC and non TOC entitlements: {len(have_both_parcels)}')
print(f'double check sum: {len(toc_parcels) + len(non_toc_parcels) - len(have_both_parcels)}')

In [None]:
toc_parcels = pcts33[pcts33.num_TOC > 0]
non_toc_parcels = pcts33[pcts33.num_nonTOC > 0]
have_both_parcels = pcts33[(pcts33.num_TOC > 0) & (pcts33.num_nonTOC > 0)]

print(f'# parcels: {len(pcts33)}')
print(f'# parcels with TOC entitlements: {len(toc_parcels)}')
print(f'# parcels with non TOC entitlements: {len(non_toc_parcels)}')
print(f'# parcels with both TOC and non TOC entitlements: {len(have_both_parcels)}')
print(f'double check sum: {len(toc_parcels) + len(non_toc_parcels) - len(have_both_parcels)}')

In [None]:
toc_parcels = df[df.num_TOC > 0]
non_toc_parcels = df[df.num_nonTOC > 0]
have_both_parcels = df[(df.num_TOC > 0) & (df.num_nonTOC > 0)]

print(f'# parcels: {len(df)}')
print(f'# parcels with TOC entitlements: {len(toc_parcels)}')
print(f'# parcels with non TOC entitlements: {len(non_toc_parcels)}')
print(f'# parcels with both TOC and non TOC entitlements: {len(have_both_parcels)}')
print(f'double check sum: {len(toc_parcels) + len(non_toc_parcels) - len(have_both_parcels)}')

In [None]:
print(f'% parcels with TOC entitlements: {len(toc_parcels) / len(df)}')
print(f'% parcels with non TOC entitlements: {len(non_toc_parcels) / len(df)}')
print(f'% parcels with both entitlements: {len(have_both_parcels) / len(df)}')

In [None]:
df.TOC_Tier.value_counts()

In [None]:
toc_parcels.zone_class.value_counts()

In [None]:
non_toc_parcels.zone_class.value_counts()

In [None]:
"""
m12.to_file(driver = 'GeoJSON', filename = '../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')

s3.upload_file('../gis/intermediate/toc_eligible_parcels_with_entitlements.geojson', 
               f'{bucket_name}', 'gis/intermediate/toc_eligible_parcels_with_entitlements.geojson')
"""

## Breakdown by TOC Tiers

In [None]:
""" 
m13 = m12.groupby('TOC_Tier').agg({'AIN':'count', 'num_TOC':'sum', 'num_nonTOC':'sum'}).reset_index()

for i in ['TOC', 'nonTOC']:
    new_col = f'pct_{i}'
    numerator = f'num_{i}'
    m13[new_col] = m13[numerator] / (m13.num_TOC + m13.num_nonTOC)
    
m13['all_AIN'] = m13.AIN.sum()
m13['pct_AIN'] = m13.AIN / m13.all_AIN

m13
"""

## Breakdown by Zone Class

In [None]:
"""
m14 = m12.groupby('zone_class').agg({'AIN': 'count', 'num_TOC': 'sum', 'num_nonTOC': 'sum'}).reset_index()

for i in ['TOC', 'nonTOC']:
    new_col = f'pct_{i}'
    numerator = f'num_{i}'
    m14[new_col] = m14[numerator] / (m14.num_TOC + m14.num_nonTOC)
    
m14['all_AIN'] = m14.AIN.sum()
m14['pct_AIN'] = m14.AIN / m14.all_AIN

m14
"""

In [None]:
"""
writer = pd.ExcelWriter('../outputs/toc_charts.xlsx', engine = 'xlsxwriter')

m13.to_excel(writer, sheet_name = 'entitlements_by_tier')
m14.to_excel(writer, sheet_name = 'entitlements_by_zone')

writer.save()
"""