# Clean ZIMAS / zoning file
* Dissolve zoning file so they are multipolygons
* Use parser in `laplan.zoning` to parse ZONE_CMPLT
* Manually list the failed to parse observations and fix
* Use this to build crosswalk of height, density, etc restrictions

In [1]:
import boto3
import geopandas as gpd
import intake
import numpy as np
import os
import pandas as pd
import laplan
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

In [3]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', 25)

In [4]:
# Dissolve zoning to get multipolygons
# File is large, but we only care about unique ZONE_CMPLT, which need to be parsed
zones = catalog.zoning.read()
zones = zones[['ZONE_CMPLT', 'ZONE_SMRY', 'geometry']].assign(
    zone2 = zones.ZONE_CMPLT
)

df = zones.dissolve(by='zone2').reset_index(drop=True)
df.head()

Unnamed: 0,geometry,ZONE_CMPLT,ZONE_SMRY
0,"MULTIPOLYGON (((6444407.300 1905809.010, 64444...",(F)CM-1-CUGU,INDUSTRIAL
1,"MULTIPOLYGON (((6478385.560 1837166.650, 64782...",(F)CM-1-HPOZ,INDUSTRIAL
2,"POLYGON ((6478551.540 1837132.960, 6478461.390...",(F)CM-1-O-HPOZ,INDUSTRIAL
3,"MULTIPOLYGON (((6416397.250 1892185.530, 64166...",(F)R2-1-RIO,RESIDENTIAL
4,"MULTIPOLYGON (((6421076.240 1939270.920, 64210...",(F)RE11-1,RESIDENTIAL


In [5]:
print(f'# obs in zoning: {len(zones)}')
print(f'# unique types of zoning: {len(df)}')

# obs in zoning: 60588
# unique types of zoning: 1934


## Parse zoning string

In [6]:
parsed_col_names = ['Q', 'T', 'zone_class', 'specific_plan', 'height_district', 'D', 'overlay']

def parse_zoning(row):
    try:
        z = laplan.zoning.ZoningInfo(row.ZONE_CMPLT)
        return pd.Series([z.Q, z.T, z.zone_class, z.specific_plan, z.height_district, z.D, z.overlay], 
                         index = parsed_col_names)
    except ValueError:
        return pd.Series(['failed', 'failed', 'failed', 'failed', 'failed', 'failed', ''], 
                         index = parsed_col_names)

    
parsed = df.apply(parse_zoning, axis = 1)

df = pd.concat([df, parsed], axis = 1)

df.head()

Unnamed: 0,geometry,ZONE_CMPLT,ZONE_SMRY,Q,T,zone_class,specific_plan,height_district,D,overlay
0,"MULTIPOLYGON (((6444407.300 1905809.010, 64444...",(F)CM-1-CUGU,INDUSTRIAL,failed,failed,failed,failed,failed,failed,
1,"MULTIPOLYGON (((6478385.560 1837166.650, 64782...",(F)CM-1-HPOZ,INDUSTRIAL,failed,failed,failed,failed,failed,failed,
2,"POLYGON ((6478551.540 1837132.960, 6478461.390...",(F)CM-1-O-HPOZ,INDUSTRIAL,failed,failed,failed,failed,failed,failed,
3,"MULTIPOLYGON (((6416397.250 1892185.530, 64166...",(F)R2-1-RIO,RESIDENTIAL,failed,failed,failed,failed,failed,failed,
4,"MULTIPOLYGON (((6421076.240 1939270.920, 64210...",(F)RE11-1,RESIDENTIAL,failed,failed,failed,failed,failed,failed,


## Fix parse fails

In [7]:
fails_crosswalk = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_zone_parse_fails.parquet')

print(f'# obs in fails_crosswalk: {len(fails_crosswalk)}')

# obs in fails_crosswalk: 43


In [8]:
# Grab all obs in our df that shows up in the fails_crosswalk, even if it was parsed correctly
# There were some other ones that were added because they weren't valid zone classes
fails = df[df.ZONE_CMPLT.isin(fails_crosswalk.ZONE_CMPLT)]
print(f'# obs in fails: {len(fails)}')

# obs in fails: 43


In [9]:
# Convert the overlay column from string to list
fails_crosswalk.overlay = fails_crosswalk.overlay.str[1:-1].str.split(',').tolist()

# Fill in Nones with empty list
fails_crosswalk['overlay'] = fails_crosswalk['overlay'].apply(lambda row: row if isinstance(row, list) else [])

In [10]:
df1 = df[~ df.ZONE_CMPLT.isin(fails_crosswalk.ZONE_CMPLT)]

# Append the successfully parsed obs with the failed ones
df2 = df1.append(fails_crosswalk)

In [11]:
# Make sure cols are the same type again
for col in ['zone_class', 'specific_plan', 'height_district']:
    df2[col] = df2[col].astype(str)

for col in ['Q', 'T', 'D']:
    df2[col] = df2[col].astype(int)

In [12]:
print(f'# obs in df: {len(df)}')
print(f'# obs in df2: {len(df2)}')

# obs in df: 1934
# obs in df2: 1934


## Need to do something about overlays and specific plans...
* leave as list? -> then split (ZONE_CMPLT, geometry) from the rest, so we can save geojson and tabular separately
* GeoJSON can't take lists. Convert to strings...later make it a list again?

In [13]:
# Fill in Nones, otherwise cannot do the apply to make the list a string
df2.overlay = df2.overlay.fillna('')

just_overlay = df2[df2.overlay != ''][['ZONE_CMPLT', 'overlay']]
just_overlay['no_brackets'] = just_overlay['overlay'].apply(', '.join)

In [14]:
split = just_overlay.no_brackets.str.split(',', expand = True).fillna('')
split.rename(columns = {0: 'o1', 1: 'o2', 2: 'o3'}, inplace = True)

just_overlay = pd.concat([just_overlay, split], axis = 1)

In [15]:
supplemental_use = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_supplemental_use_overlay.parquet')
specific_plan = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_specific_plan.parquet')

In [16]:
supplemental_use_dict = supplemental_use.set_index('supplemental_use').to_dict()['supplemental_use_description']
specific_plan_dict = specific_plan.set_index('specific_plan').to_dict()['specific_plan_description']

In [17]:
# Trouble mapping it across all columns
for col in ['o1', 'o2', 'o3']:
    just_overlay[col] = just_overlay[col].str.strip()
    new_col = f'{col}_descrip'
    just_overlay[new_col] = just_overlay[col].map(supplemental_use_dict)
    just_overlay[new_col] = just_overlay[new_col].fillna('')

In [18]:
# Put df back together
df3 = pd.merge(df2, just_overlay, on = 'ZONE_CMPLT', how = 'left', validate = '1:1')
df3.head()

Unnamed: 0,geometry,ZONE_CMPLT,ZONE_SMRY,Q,T,zone_class,specific_plan,height_district,D,overlay_x,overlay_y,no_brackets,o1,o2,o3,o1_descrip,o2_descrip,o3_descrip
0,"POLYGON ((6404307.250 1933058.300, 6404610.470...",(Q)A1-1-H-K,AGRICULTURE,1,0,A1,,1,0,"[H, K]","[H, K]","H, K",H,K,,unknown2,equinekeeping,
1,"POLYGON ((6386802.410 1883303.620, 6386809.340...",(Q)A1-1VLD,AGRICULTURE,1,0,A1,,1VL,1,[],[],,,,,,,
2,"MULTIPOLYGON (((6481495.330 1913564.200, 64814...",(Q)A2-1,AGRICULTURE,1,0,A2,,1,0,[],[],,,,,,,
3,"MULTIPOLYGON (((6381155.830 1883743.800, 63811...",(Q)C1-1,COMMERCIAL,1,0,C1,,1,0,[],[],,,,,,,
4,"POLYGON ((6418734.610 1817051.450, 6418751.000...",(Q)C1-1-O-CA,COMMERCIAL,1,0,C1,,1,0,"[O, CA]","[O, CA]","O, CA",O,CA,,oil_drilling,commercial_and_artcraft,


In [19]:
# Invalid overlays
# What is SP? Specific Plan?
# Also, can't find H

## Merge and export

In [20]:
col_order = ['ZONE_CMPLT', 'ZONE_SMRY', 
             'Q', 'T', 'zone_class', 'height_district', 'D',
             'specific_plan', 'no_brackets', 'geometry']

# Geometry is messed up, so let's get it back from original dissolve
final = (pd.merge(df[['ZONE_CMPLT', 'geometry']], df3.drop(columns = "geometry"), 
                  on = "ZONE_CMPLT", how = "left", validate = "1:1")
         [col_order]
         .rename(columns = {'no_brackets': 'overlay'})
         .sort_values(['ZONE_CMPLT', 'ZONE_SMRY'])
         .reset_index(drop=True)         
        )

final.head()

Unnamed: 0,ZONE_CMPLT,ZONE_SMRY,Q,T,zone_class,height_district,D,specific_plan,overlay,geometry
0,(F)CM-1-CUGU,,0,1,CM,1,0,,CUGU,"MULTIPOLYGON (((6444407.300 1905809.010, 64444..."
1,(F)CM-1-HPOZ,,0,1,CM,1,0,,HPOZ,"MULTIPOLYGON (((6478385.560 1837166.650, 64782..."
2,(F)CM-1-O-HPOZ,,0,1,CM,1,0,,"O, HPOZ","POLYGON ((6478551.540 1837132.960, 6478461.390..."
3,(F)R2-1-RIO,,0,0,R2,1,0,,RIO,"MULTIPOLYGON (((6416397.250 1892185.530, 64166..."
4,(F)RE11-1,,0,0,RE11,1,0,,,"MULTIPOLYGON (((6421076.240 1939270.920, 64210..."


In [21]:
file_name = 'gis/raw/parsed_zoning'
utils.make_zipped_shapefile(final, f'../{file_name}')

s3.upload_file(f'../{file_name}.zip', bucket_name, f'{file_name}.zip')

# Save as geoparquet
final.to_parquet(f'../{file_name}.parquet')
s3.upload_file(f'../{file_name}.parquet', bucket_name, f'{file_name}.parquet')

# Remove since QGIS can't read geoparquet
os.remove(f'../{file_name}.parquet')

Path name: ../gis/raw/parsed_zoning
Dirname (1st element of path): ../gis/raw/parsed_zoning
Shapefile name: parsed_zoning.shp
Shapefile component parts folder: ../gis/raw/parsed_zoning/parsed_zoning.shp



This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  import sys
