# Clean ZIMAS / zoning file
* Use parser in `laplan.zoning` to parse ZONE_CMPLT
* Use this to build crosswalk of height, density, etc restrictions

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import boto3
import laplan

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

In [3]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', 25)

In [4]:
df = catalog.zoning.read()
df = df[['ZONE_CMPLT']].drop_duplicates()

## Parse zoning string

In [5]:
parsed_col_names = ['Q', 'T', 'zone_class', 'specific_plan', 'height_district', 'D', 'overlay']

def parse_zoning(row):
    try:
        z = laplan.zoning.ZoningInfo(row.ZONE_CMPLT)
        return pd.Series([z.Q, z.T, z.zone_class, z.specific_plan, z.height_district, z.D, z.overlay], 
                         index = parsed_col_names)
    except ValueError:
        return pd.Series(['failed', 'failed', 'failed', 'failed', 'failed', 'failed', ''], 
                         index = parsed_col_names)

    
parsed = df.apply(parse_zoning, axis = 1)

df = pd.concat([df, parsed], axis = 1)

df.head()

Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,specific_plan,height_district,D,overlay
0,C2-1-SP,False,False,C2,,1,False,[SP]
1,C1.5-1-SP,False,False,C1.5,,1,False,[SP]
4,[Q]C1.5-1,True,False,C1.5,,1,False,[]
10,[Q]C1.5-1-RIO,True,False,C1.5,,1,False,[RIO]
12,[Q]C1.5-1-O,True,False,C1.5,,1,False,[O]


## Fix parse fails

In [6]:
fails_crosswalk = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_zone_parse_fails.parquet')

print(f'# obs in fails_crosswalk: {len(fails_crosswalk)}')

# obs in fails_crosswalk: 43


In [7]:
# Grab all obs in our df that shows up in the fails_crosswalk, even if it was parsed correctly
# There were some other ones that were added because they weren't valid zone classes
fails = df[df.ZONE_CMPLT.isin(fails_crosswalk.ZONE_CMPLT)]
print(f'# obs in fails: {len(fails)}')

# obs in fails: 43


In [8]:
# Convert the overlay column from string to list
fails_crosswalk.overlay = fails_crosswalk.overlay.str[1:-1].str.split(',').tolist()

# Fill in Nones with empty list
fails_crosswalk['overlay'] = fails_crosswalk['overlay'].apply(lambda row: row if isinstance(row, list) else [])

In [9]:
df1 = df[~ df.ZONE_CMPLT.isin(fails_crosswalk.ZONE_CMPLT)]

# Append the successfully parsed obs with the failed ones
df2 = df1.append(fails_crosswalk)

In [10]:
# Make sure cols are the same type again
for col in ['zone_class', 'specific_plan', 'height_district']:
    df2[col] = df2[col].astype(str)

for col in ['Q', 'T', 'D']:
    df2[col] = df2[col].astype(int)

In [11]:
print(f'# obs in df: {len(df)}')
print(f'# obs in df2: {len(df2)}')

# obs in df: 1934
# obs in df2: 1934


## Need to do something about overlays and specific plans...
* leave as list? -> then split (ZONE_CMPLT, geometry) from the rest, so we can save geojson and tabular separately
* GeoJSON can't take lists. Convert to strings...later make it a list again?

In [12]:
# Fill in Nones, otherwise cannot do the apply to make the list a string
df2.overlay = df2.overlay.fillna('')

just_overlay = df2[df2.overlay != ''][['ZONE_CMPLT', 'overlay']]
just_overlay['no_brackets'] = just_overlay['overlay'].apply(', '.join)

In [13]:
split = just_overlay.no_brackets.str.split(',', expand = True).fillna('')
split.rename(columns = {0: 'o1', 1: 'o2', 2: 'o3'}, inplace = True)

just_overlay = pd.concat([just_overlay, split], axis = 1)

In [14]:
supplemental_use = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_supplemental_use_overlay.parquet')
specific_plan = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_specific_plan.parquet')

In [15]:
supplemental_use_dict = supplemental_use.set_index('supplemental_use').to_dict()['supplemental_use_description']
specific_plan_dict = specific_plan.set_index('specific_plan').to_dict()['specific_plan_description']

In [16]:
# Trouble mapping it across all columns
for col in ['o1', 'o2', 'o3']:
    just_overlay[col] = just_overlay[col].str.strip()
    new_col = f'{col}_descrip'
    just_overlay[new_col] = just_overlay[col].map(supplemental_use_dict)
    just_overlay[new_col] = just_overlay[new_col].fillna('')

In [17]:
# Put df back together
df3 = pd.merge(df2, just_overlay, on = 'ZONE_CMPLT', how = 'left', validate = '1:1')
df3.head()

Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,specific_plan,height_district,D,overlay_x,overlay_y,no_brackets,o1,o2,o3,o1_descrip,o2_descrip,o3_descrip
0,C2-1-SP,0,0,C2,,1,0,[SP],[SP],SP,SP,,,unknown1,,
1,C1.5-1-SP,0,0,C1.5,,1,0,[SP],[SP],SP,SP,,,unknown1,,
2,[Q]C1.5-1,1,0,C1.5,,1,0,[],[],,,,,,,
3,[Q]C1.5-1-RIO,1,0,C1.5,,1,0,[RIO],[RIO],RIO,RIO,,,river_improvement_overlay,,
4,[Q]C1.5-1-O,1,0,C1.5,,1,0,[O],[O],O,O,,,oil_drilling,,


In [18]:
# Invalid overlays
# What is SP? Specific Plan?
# Also, can't find H

## Merge and export

In [19]:
full = catalog.zoning.read()
full.crs = "EPSG:2229"

full.rename(columns = {'ZONE_CLASS': 'ZONE_CLASS1'}, inplace = True)

In [20]:
final = pd.merge(full, df3, on = 'ZONE_CMPLT', how = 'left', validate = 'm:1')

In [21]:
final['same'] = final.apply(
    lambda row: True if row.ZONE_CLASS1 == row.zone_class else False,
    axis = 1,
)
display(final.same.value_counts())

True     59894
False      694
Name: same, dtype: int64

In [22]:
final.columns

Index(['OBJECTID', 'ZONE_CMPLT', 'ZONE_CLASS1', 'ZONE_SMRY', 'Shape__Area',
       'Shape__Length', 'geometry', 'Q', 'T', 'zone_class', 'specific_plan',
       'height_district', 'D', 'overlay_x', 'overlay_y', 'no_brackets', 'o1',
       'o2', 'o3', 'o1_descrip', 'o2_descrip', 'o3_descrip', 'same'],
      dtype='object')

In [23]:
drop_me = ['OBJECTID', 'Shape__Area', 'Shape__Length',
           'overlay_x', 'overlay_y', 'no_brackets', 'same']

final = final.drop(columns = drop_me)

final.head()

Unnamed: 0,ZONE_CMPLT,ZONE_CLASS1,ZONE_SMRY,geometry,Q,T,zone_class,specific_plan,height_district,D,o1,o2,o3,o1_descrip,o2_descrip,o3_descrip
0,C2-1-SP,C2,COMMERCIAL,"POLYGON ((6460317.049 1832062.462, 6460329.590...",0,0,C2,,1,0,SP,,,unknown1,,
1,C1.5-1-SP,C1.5,COMMERCIAL,"POLYGON ((6459991.770 1832633.240, 6460066.270...",0,0,C1.5,,1,0,SP,,,unknown1,,
2,C1.5-1-SP,C1.5,COMMERCIAL,"POLYGON ((6460331.280 1832633.620, 6460331.440...",0,0,C1.5,,1,0,SP,,,unknown1,,
3,C1.5-1-SP,C1.5,COMMERCIAL,"POLYGON ((6460068.300 1833302.250, 6460083.350...",0,0,C1.5,,1,0,SP,,,unknown1,,
4,[Q]C1.5-1,C1.5,COMMERCIAL,"POLYGON ((6479594.100 1840055.860, 6479624.020...",1,0,C1.5,,1,0,,,,,,


In [24]:
final.to_file(driver = 'GeoJSON', filename = '../gis/raw/parsed_zoning.geojson')
s3.upload_file('../gis/raw/parsed_zoning.geojson', bucket_name, 'gis/raw/parsed_zoning.geojson')