# Clean ZIMAS / zoning file
* Use parser in utils to parse ZONE_CMPLT
* Iterative process - work with Planning to figure out the ones that can't be parsed
* Use this to build crosswalk of height, density, etc restrictions

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

In [None]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', None)

In [None]:
df = catalog.zoning.read()
df = df[['ZONE_CMPLT']].drop_duplicates()

In [3]:
df = pd.read_parquet('../data/zoning.parquet')

## Parse zoning string

In [4]:
parsed_col_names = ['Q', 'T', 'zone_class', 'specific_plan', 'height_district', 'D', 'overlay']

def parse_zoning(row):
    try:
        z = utils.ZoningInfo(row.ZONE_CMPLT)
        return pd.Series([z.Q, z.T, z.zone_class, z.specific_plan, z.height_district, z.D, z.overlay], 
                         index = parsed_col_names)
    except ValueError:
        return pd.Series(['failed', 'failed', 'failed', 'failed', 'failed', 'failed', 'failed'], 
                         index = parsed_col_names)

    
parsed = df.apply(parse_zoning, axis = 1)

df = pd.concat([df, parsed], axis = 1)

df.head()

Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,specific_plan,height_district,D,overlay
0,C2-1-SP,False,False,C2,,1,False,[SP]
1,C1.5-1-SP,False,False,C1.5,,1,False,[SP]
4,[Q]C1.5-1,True,False,C1.5,,1,False,[]
10,[Q]C1.5-1-RIO,True,False,C1.5,,1,False,[RIO]
12,[Q]C1.5-1-O,True,False,C1.5,,1,False,[O]


In [5]:
fails_crosswalk = pd.read_parquet('s3://city-planning-entitlements/data/crosswalk_zone_parse_fails.parquet')

print(f'# obs in fails_crosswalk: {len(fails_crosswalk)}')

# obs in fails_crosswalk: 47


In [6]:
# Grab all obs in our df that shows up in the fails_crosswalk, even if it was parsed correctly
# There were some other ones that were added because they weren't valid zone classes
fails = df[df.ZONE_CMPLT.isin(fails_crosswalk.ZONE_CMPLT)]
print(f'# obs in fails: {len(fails)}')

# obs in fails: 43


In [7]:
# Convert the overlay column from string to list
fails_crosswalk.overlay = fails_crosswalk.overlay.str[1:-1].str.split(',').tolist()

# Fill in Nones with empty list
fails_crosswalk['overlay'] = fails_crosswalk['overlay'].apply(lambda row: row if isinstance(row, list) else [])

In [8]:
df1 = df[~ df.ZONE_CMPLT.isin(fails_crosswalk.ZONE_CMPLT)]

# Append the successfully parsed obs with the failed ones
df2 = df1.append(fails_crosswalk)

In [9]:
# Make sure cols are the same type again
for col in ['zone_class', 'specific_plan', 'height_district']:
    df2[col] = df2[col].astype(str)

for col in ['Q', 'T', 'D']:
    df2[col] = df2[col].astype(int)

In [10]:
# Check a couple of cases to see how well the parser worked
check_my_zoning = ['SL', 'USC-1A', 'CW', 
                       'R4(PV)', 'OS(UV)', 'GW(CA)', 
                      '[Q]C1.5-1-RIO', '(T)(Q)RAS3-1L', '(T)M1-2D']
display(df[df.ZONE_CMPLT.isin(check_my_zoning)])

# Check the cases after we fixed failed ones
display(df2[df2.ZONE_CMPLT.isin(check_my_zoning)])

Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,specific_plan,height_district,D,overlay
10,[Q]C1.5-1-RIO,True,False,C1.5,,1,False,[RIO]
5859,USC-1A,failed,failed,failed,failed,failed,failed,failed
12398,GW(CA),False,False,GW,,,False,[CA]
15912,CW,False,False,,CW,,False,[]
47002,R4(PV),False,False,R4,PV,,False,[]
51123,SL,False,False,SL,,,False,[]
59863,(T)(Q)RAS3-1L,True,True,RAS3,,1L,False,[]
59893,(T)M1-2D,False,True,M1,,2,True,[]
60015,OS(UV),False,False,OS,UV,,False,[]


Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,specific_plan,height_district,D,overlay
10,[Q]C1.5-1-RIO,1,0,C1.5,,1,0,[RIO]
12398,GW(CA),0,0,GW,,,0,[CA]
47002,R4(PV),0,0,R4,PV,,0,[]
51123,SL,0,0,SL,,,0,[]
59863,(T)(Q)RAS3-1L,1,1,RAS3,,1L,0,[]
59893,(T)M1-2D,0,1,M1,,2,1,[]
60015,OS(UV),0,0,OS,UV,,0,[]
12,USC-1A,0,0,,USC-1A,,0,[]
19,CW,0,0,,CW,,0,[]


## Zone Class

In [11]:
# Check if valid zone classes are used
df2['valid_zone'] = df2.zone_class.isin(utils.VALID_ZONE_CLASS)

df2.valid_zone.value_counts()

True     1899
False      39
Name: valid_zone, dtype: int64

In [12]:
print(f'# obs with invalid zones: {len(df2[df2.valid_zone == False])}')
print(f'# of unique invalid zones: {df2[df2.valid_zone == False].zone_class.nunique()}')

# obs with invalid zones: 39
# of unique invalid zones: 4


In [13]:
df2[(df2.valid_zone == False) & (df2.zone_class != '')]

Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,specific_plan,height_district,D,overlay,valid_zone
2,[Q]RZ5-1,1,0,RZ5,,1.0,0,[],False
3,A2P-1,0,0,A2P,,1.0,0,[],False
4,(T)(Q)RZ5-1,1,1,RZ5,,1.0,0,[],False
33,M(PV),0,0,M,PV,,0,[],False
43,[Q]RZ5-1,1,0,RZ5,,1.0,0,[],False
44,A2P-1,0,0,A2P,,1.0,0,[],False
45,(T)(Q)RZ5-1,1,1,RZ5,,1.0,0,[],False
46,M(PV),0,0,M,PV,,0,[],False


## Height District

In [14]:
df2['valid_height'] = df2.apply(lambda row: False if row.height_district in(['', 'invalid']) else True, axis = 1)

df2.valid_height.value_counts()

True     1873
False      65
Name: valid_height, dtype: int64

In [16]:
# Look at what these invalid heights are.
# 1VL, 2VL seem like they could be legitimate. What is EZ1VL?
df2[df2.valid_height == False].height_district.value_counts()

    65
Name: height_district, dtype: int64

In [17]:
print(f'# obs with invalid height districts: {len(df2[df2.valid_height == False])}')

# obs with invalid height districts: 65


## D Limit

In [18]:
df2.D.value_counts()

0    1635
1     303
Name: D, dtype: int64

In [19]:
df2[df2.D == True].height_district.value_counts()
# Checked, if there is height district followed by a D, those are all tagged as True

2      142
1       61
1VL     39
4       31
1L      22
1XL      6
3        2
Name: height_district, dtype: int64

## Overlay

In [20]:
# Fill in Nones, otherwise cannot do the apply to make the list a string
df.overlay = df.overlay.fillna('')

just_overlay = df[['overlay']]

In [21]:
just_overlay['no_brackets'] = just_overlay['overlay'].apply(', '.join)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [22]:
just_overlay.no_brackets.value_counts()

            661
RIO         181
O           149
CUGU        141
CDO         101
           ... 
HPOZ, SP      1
H, RPD        1
H, O, K       1
H, O          1
S, RIO        1
Name: no_brackets, Length: 63, dtype: int64

In [24]:
supplemental_use = pd.read_parquet('s3://city-planning-entitlements/data/crosswalk_supplemental_use_overlay.parquet')
specific_plan = pd.read_parquet('s3://city-planning-entitlements/data/crosswalk_specific_plan.parquet')

In [27]:
supplemental_use

Unnamed: 0,supplemental_use,supplemental_use_description
0,O,oil_drilling
1,S,animal_slaughtering
2,G,surface_mining
3,K,equinekeeping
4,CA,commercial_and_artcraft
5,MU,mixed_use
6,FH,fence_heights
7,SN,sign
8,HS,hillside_standards
9,RG,rear_detached_garage


In [25]:
supplemental_use_dict = supplemental_use.to_dict()

In [None]:
# Make a dictionary for supplemental use (map to overlay)
supplemental_use_dict = {
    # Supplemental Use found in Table 2 or Zoning Code Article 3
    'O': 'oil_drilling', 
    'S': 'animal_slaughtering', 
    'G': 'surface_mining', 
    'K': 'equinekeeping', 
    'CA': 'commercial_and_artcraft', 
    'MU': 'mixed_use', 
    'FH': 'fence_heights', 
    'SN': 'sign', 
    'HS': 'hillside_standards',
    'RG': 'rear_detached_garage', 
    'RPD': 'residential_planned_development', 
    'POD': 'pedestrian_oriented_district',
    'CDO': 'community_design_overlay',
    'NSO': 'neighborhood_stabilization_ordinance',
    'RFA': 'residential_floor_area',
    'MPR': 'modified_parking_requirement',
    'RIO': 'river_improvement_overlay',
    'HCR': 'hillside_construction_regulation',
    'CPIO': 'community_plan_implementation_overlay',
    'CUGU': 'clean_up_green_up_overlay',
    'HPOZ': 'historic_preservation_overlay_zone', 
}

In [None]:
specific_plan_dict = {
    # Are other zoning designations put in with zone class or overlay? HPOX appears as overlay, but a lot of other ones appear in the beginning with zone class.
    # Even more designations found in Zoning Code Article 2
    'CEC': 'convention_and_event_center_specific_plan',
    # Other Zoning Designations found in Zoning Code Article 2, Sec 12.04 Zones - Districts - Symbols.
    # Are these always in parantheses in the beginning, near the prefix??
    'CW': 'central_city_west_specific_plan', 
    'GM': 'glencoe_maxella_specific_plan', 
    'OX': 'oxford_triangle_specific_plan', 
    'PV': 'playa_vista_specific_plan', 
    'WC': 'warner_center_specific_plan', 
    'ADP': 'alameda_district_specific_plan', 
    'CCS': 'century_city_south_studio_zone', 
    'CSA': 'centers_study_area', 
    'PKM': 'park_mile_specific_plan',
    'LAX': 'los_angeles_airport_specific_plan', 
    #'HPOZ': 'historic_preservation_overlay_zone', 
    'LASED': 'la_sports_and_entertainment_specific_plan',
    'USC-1A': 'usc_university_park_campus_specific_plan_subarea_1a_zone',
    'USC-1B': 'usc_university_park_campus_specific_plan_subarea_1b_zone',    
    'USC-2': 'usc_university_park_campus_specific_plan_subarea_2_zone',
    'USC-3': 'usc_university_park_campus_specific_plan_subarea_3_zone',
    'PVSP': 'ponte_vista_at_san_pedro_specific_plan'
}

In [None]:
split = just_overlay.no_brackets.str.split(',', expand = True).fillna('')
split.rename(columns = {0: 'o1', 1: 'o2', 2: 'o3'}, inplace = True)

In [None]:
# Compile list of unique overlays, and see how well that matches with the dict
unique_overlays = pd.DataFrame(split.o1.append(split.o2).append(split.o3))

In [None]:
unique_overlays.rename(columns = {0: 'overlay'}, inplace = True)
unique_overlays.overlay = unique_overlays.overlay.str.strip()
unique_overlays = unique_overlays.drop_duplicates()
unique_overlays = unique_overlays.reset_index().drop(columns = ['index'])

In [None]:
unique_overlays['overlay_descrip'] = unique_overlays.overlay.map(supplemental_use_dict)

In [None]:
unique_overlays[unique_overlays.overlay_descrip.isna()]

In [None]:
# Invalid overlays
# What is SP? Specific Plan?
# Also, can't find H

## Misclasified
* Manually fix with a dictionary?
* Ask Planning team what invalid values are

In [None]:
spilt = pd.concat([just_overlay, split], axis = 1)

In [None]:
for col in ['o1', 'o2', 'o3']:
    new_col = f'{col}_descrip'
    split[col] = split[col].str.strip()
    split[new_col] = split[col].map(supplemental_use_dict)
    split[new_col] = split[new_col].fillna('')
    
tagged_overlay = pd.concat([just_overlay, split], axis = 1)

In [None]:
tagged_overlay.head(30)

In [None]:
""" 
def find_overlays(row, overlays, op=all):
    if row.overlay is None:
        return False
    else:
        return op([o in row.overlay for o in overlays])

df[df.apply(lambda x: find_overlays(x, ['RIO', 'CDO'], op=any), axis=1)]
"""

## Put back together
### Make sure columns don't contain mixed types

In [None]:
df2 = pd.concat([df, tagged_overlay], axis = 1)

In [None]:
df2.dtypes

In [None]:
for col in ['Q', 'T', 'zone_class', 'specific_plan', 'height_district', 'D']:
    print(f'Column: {col}')
    print(df[col].value_counts())

### Merge

In [None]:
full = catalog.zoning.read()
full.crs = {'init':'epsg:2229'}

In [None]:
final = pd.merge(full, df2, on = 'ZONE_CMPLT', how = 'left', validate = 'm:1')

In [None]:
final = final.drop(columns = ['OBJECTID', 'no_brackets', 'Shape__Area', 'Shape__Length'])

final.head()