# Clean ZIMAS / zoning file
* Use parser in utils to parse ZONE_CMPLT
* Iterative process - work with Planning to figure out the ones that can't be parsed
* Use this to build crosswalk of height, density, etc restrictions

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

In [None]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', None)

In [None]:
df = catalog.zoning.read()
df = df[['ZONE_CMPLT']].drop_duplicates()

In [5]:
df = pd.read_parquet('../data/zoning.parquet')

## Parse zoning string

In [6]:
parsed_col_names = ['Q', 'T', 'zone_class', 'specific_plan', 'height_district', 'D', 'overlay', 'invalid_zone', 'invalid_height']

def parse_zoning(row):
    try:
        z = utils.ZoningInfo(row.ZONE_CMPLT)
        return pd.Series([z.Q, z.T, z.zone_class, z.specific_plan, z.height_district, z.D, z.overlay, z.invalid_zone, z.invalid_height], 
                         index = parsed_col_names)
    except ValueError:
        return pd.Series(['failed', 'failed', 'failed', 'failed', 'failed', 'failed', 'failed', '', ''], 
                         index = parsed_col_names)

    
parsed = df.apply(parse_zoning, axis = 1)

df = pd.concat([df, parsed], axis = 1)

df.head()

Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,specific_plan,height_district,D,overlay,invalid_zone,invalid_height
0,C2-1-SP,False,False,C2,,1,False,[SP],,
1,C1.5-1-SP,False,False,C1.5,,1,False,[SP],,
4,[Q]C1.5-1,True,False,C1.5,,1,False,,,
10,[Q]C1.5-1-RIO,True,False,C1.5,,1,False,[RIO],,
12,[Q]C1.5-1-O,True,False,C1.5,,1,False,[O],,


In [7]:
# Check a couple of cases to see how well the parser worked
df[df.ZONE_CMPLT.isin(['SL', 'USC-1A', 'CW', 
                       'R4(PV)', 'OS(UV)', 'GW(CA)', 
                      '[Q]C1.5-1-RIO', '(T)(Q)RAS3-1L', '(T)M1-2D'])]

Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,specific_plan,height_district,D,overlay,invalid_zone,invalid_height
10,[Q]C1.5-1-RIO,True,False,C1.5,,1,False,[RIO],,
5859,USC-1A,False,False,invalid,USC-1A,,False,,,
12398,GW(CA),False,False,invalid,CA,,False,,,
15912,CW,False,False,,CW,,False,,,
47002,R4(PV),False,False,R4,PV,,False,,,
51123,SL,False,False,SL,,,False,,,
59863,(T)(Q)RAS3-1L,True,True,RAS3,,1L,False,,,
59893,(T)M1-2D,False,True,M1,,2,True,,,
60015,OS(UV),False,False,OS,UV,,False,,,


In [14]:
df[(df.zone_class == 'invalid') | (df.zone_class == 'failed') | (df.zone_class == "")][['ZONE_CMPLT']]

Unnamed: 0,ZONE_CMPLT
70,(WC)TOPANGA-SN-RIO
772,USC-3
1094,[Q]RZ5-1
1303,A2P-1
1369,(T)(Q)RZ5-1
1487,(T)R4P-2
1622,[Q]R2P-1-CDO
1678,[Q]R3P-1
1681,[Q]R3P-1L
1684,[Q]R3P-4-CDO


In [9]:
print(f"Obs that failed to be parsed: {len(df[df.Q == 'failed'])}")
print(f"Obs that failed to be parsed, invalid, or blanks: {len(df[(df.zone_class == 'invalid') | (df.zone_class == 'failed') | (df.zone_class == '')])}")

Obs that failed to be parsed: 0
Obs that failed to be parsed, invalid, or blanks: 107


  result = method(y)


In [None]:
""" 
With hyphen, try this:
    ([0-9A-Z]+)*(\([A-Z]+\))*([-1-4DLVXS]+)*((?:-[A-Z]+)*)$
"""

## Zone Class

In [15]:
# Check if valid zone classes are used
df['valid_zone'] = df.zone_class.isin(utils.VALID_ZONE_CLASS)

df.valid_zone.value_counts()

True     1827
False     107
Name: valid_zone, dtype: int64

In [16]:
print(f'# obs with invalid zones: {len(df[df.valid_zone == False])}')
print(f'# of unique invalid zones: {df[df.valid_zone == False].zone_class.nunique()}')

# obs with invalid zones: 107
# of unique invalid zones: 2


In [17]:
# Look at what these invalid zones are
# Appears that you can have multiple zones? R3 + P? Is this correct?
# Or, is there a hierarchy, where least restrictive includes all uses of most restrictive, and that spans across residential, agricultural, etc.
df[df.zone_class == 'invalid'].invalid_zone.value_counts()

        21
R3P     10
R4P      9
R1P      8
R1R3     4
RAP      3
R1V3     3
R2P      2
RZ5      2
R1H1     2
R5P      2
R1V2     2
A2P      1
RSP      1
ADP      1
R1V1     1
CCS      1
CEC      1
Name: invalid_zone, dtype: int64

## Height District

In [18]:
df['valid_height'] = df.apply(lambda row: False if row.height_district in(['', 'invalid']) else True, axis = 1)

df.valid_height.value_counts()

True     1827
False     107
Name: valid_height, dtype: int64

In [19]:
# Look at what these invalid heights are.
# 1VL, 2VL seem like they could be legitimate. What is EZ1VL?
df[df.valid_height == False].invalid_height.value_counts()

         84
O         8
RG        4
EZ1VL     3
2L        2
2VL       1
CDO       1
CSA1      1
CPIO      1
RIO       1
HPOZ      1
Name: invalid_height, dtype: int64

In [20]:
print(f'# obs with invalid height districts: {len(df[df.valid_height == False])}')
print(f'# of unique invalid height districts: {df[df.valid_height == False].invalid_height.nunique()}')

# obs with invalid height districts: 107
# of unique invalid height districts: 11


## D Limit

In [21]:
df.D.value_counts()

False    1635
True      299
Name: D, dtype: int64

In [22]:
df[df.D == True].height_district.value_counts()
# Checked, if there is height district followed by a D, those are all tagged as True

2      138
1       61
1VL     39
4       31
1L      22
1XL      6
3        2
Name: height_district, dtype: int64

In [23]:
# Will need to make sure it's not mixed-type at time of export. Cannot be boolean and string
#df.D = df.replace('', False)
df.D = df.D.astype(bool)

## Overlay

In [24]:
type(df.overlay.iloc[1])

list

In [29]:
# Fill in Nones, otherwise cannot do the apply to make the list a string
df.overlay = df.overlay.fillna('')

just_overlay = df[df.overlay != 'failed'][['overlay']]

In [31]:
just_overlay['no_brackets'] = just_overlay['overlay'].apply(', '.join)

In [32]:
just_overlay.no_brackets.value_counts()

                 705
RIO              180
O                148
CUGU             141
CDO              100
CPIO              84
HPOZ              73
O, CUGU           54
CDO, RIO          52
K                 45
SN                36
RIO, CUGU         29
HCR               27
H                 26
O, HPOZ           25
CDO, CUGU         21
O, CPIO           18
G, CUGU           17
K, RIO            13
H, HCR            10
RFA               10
SP                 9
H, K               9
CA                 8
O, CDO             8
O, SN              7
HPOZ, CPIO         6
RIO, POD           6
O, K               6
O, CA              5
K, CUGU            5
H, RIO             4
K, CPIO            4
CDO, SN            4
H, RPD, HCR        4
O, CDO, CUGU       4
SN, CPIO           3
G                  3
O, HPOZ, CPIO      3
O, SP              2
POD                2
O, POD             2
K, HPOZ            2
O, HPOZ, CUGU      1
O, H, K            1
S, RIO             1
O, RFA             1
CDO, HPOZ    

In [33]:
# Make a dictionary for supplemental use (map to overlay)
supplemental_use_dict = {
    # Supplemental Use found in Table 2 or Zoning Code Article 3
    'O': 'oil_drilling', 
    'S': 'animal_slaughtering', 
    'G': 'surface_mining', 
    'K': 'equinekeeping', 
    'CA': 'commercial_and_artcraft', 
    'MU': 'mixed_use', 
    'FH': 'fence_heights', 
    'SN': 'sign', 
    'HS': 'hillside_standards',
    'RG': 'rear_detached_garage', 
    'RPD': 'residential_planned_development', 
    'POD': 'pedestrian_oriented_district',
    'CDO': 'community_design_overlay',
    'NSO': 'neighborhood_stabilization_ordinance',
    'RFA': 'residential_floor_area',
    'MPR': 'modified_parking_requirement',
    'RIO': 'river_improvement_overlay',
    'HCR': 'hillside_construction_regulation',
    'CPIO': 'community_plan_implementation_overlay',
    'CUGU': 'clean_up_green_up_overlay',
    'HPOZ': 'historic_preservation_overlay_zone', 
}

In [34]:
specific_plan_dict = {
    # Are other zoning designations put in with zone class or overlay? HPOX appears as overlay, but a lot of other ones appear in the beginning with zone class.
    # Even more designations found in Zoning Code Article 2
    'CEC': 'convention_and_event_center_specific_plan',
    # Other Zoning Designations found in Zoning Code Article 2, Sec 12.04 Zones - Districts - Symbols.
    # Are these always in parantheses in the beginning, near the prefix??
    'CW': 'central_city_west_specific_plan', 
    'GM': 'glencoe_maxella_specific_plan', 
    'OX': 'oxford_triangle_specific_plan', 
    'PV': 'playa_vista_specific_plan', 
    'WC': 'warner_center_specific_plan', 
    'ADP': 'alameda_district_specific_plan', 
    'CCS': 'century_city_south_studio_zone', 
    'CSA': 'centers_study_area', 
    'PKM': 'park_mile_specific_plan',
    'LAX': 'los_angeles_airport_specific_plan', 
    #'HPOZ': 'historic_preservation_overlay_zone', 
    'LASED': 'la_sports_and_entertainment_specific_plan',
    'USC-1A': 'usc_university_park_campus_specific_plan_subarea_1a_zone',
    'USC-1B': 'usc_university_park_campus_specific_plan_subarea_1b_zone',    
    'USC-2': 'usc_university_park_campus_specific_plan_subarea_2_zone',
    'USC-3': 'usc_university_park_campus_specific_plan_subarea_3_zone',
    'PVSP': 'ponte_vista_at_san_pedro_specific_plan'
}

In [35]:
split = just_overlay.no_brackets.str.split(',', expand = True).fillna('')
split.rename(columns = {0: 'o1', 1: 'o2', 2: 'o3'}, inplace = True)

In [36]:
# Compile list of unique overlays, and see how well that matches with the dict
unique_overlays = pd.DataFrame(split.o1.append(split.o2).append(split.o3))

In [37]:
unique_overlays.rename(columns = {0: 'overlay'}, inplace = True)
unique_overlays.overlay = unique_overlays.overlay.str.strip()
unique_overlays = unique_overlays.drop_duplicates()
unique_overlays = unique_overlays.reset_index().drop(columns = ['index'])

In [38]:
unique_overlays['overlay_descrip'] = unique_overlays.overlay.map(supplemental_use_dict)

In [39]:
unique_overlays[unique_overlays.overlay_descrip.isna()]

Unnamed: 0,overlay,overlay_descrip
0,SP,
1,,
10,H,


In [None]:
# Invalid overlays
# What is SP? Specific Plan?
# Also, can't find H

## Misclasified
* Manually fix with a dictionary?
* Ask Planning team what invalid values are

In [None]:
spilt = pd.concat([just_overlay, split], axis = 1)

In [None]:
for col in ['o1', 'o2', 'o3']:
    new_col = f'{col}_descrip'
    split[col] = split[col].str.strip()
    split[new_col] = split[col].map(supplemental_use_dict)
    split[new_col] = split[new_col].fillna('')
    
tagged_overlay = pd.concat([just_overlay, split], axis = 1)

In [None]:
tagged_overlay.head(30)

In [None]:
""" 
def find_overlays(row, overlays, op=all):
    if row.overlay is None:
        return False
    else:
        return op([o in row.overlay for o in overlays])

df[df.apply(lambda x: find_overlays(x, ['RIO', 'CDO'], op=any), axis=1)]
"""

## Put back together
### Make sure columns don't contain mixed types

In [None]:
df2 = pd.concat([df, tagged_overlay], axis = 1)

In [None]:
df2.dtypes

In [None]:
for col in ['Q', 'T', 'zone_class', 'specific_plan', 'height_district', 'D']:
    print(f'Column: {col}')
    print(df[col].value_counts())

### Merge

In [None]:
full = catalog.zoning.read()
full.crs = {'init':'epsg:2229'}

In [None]:
final = pd.merge(full, df2, on = 'ZONE_CMPLT', how = 'left', validate = 'm:1')

In [None]:
final = final.drop(columns = ['OBJECTID', 'no_brackets', 'Shape__Area', 'Shape__Length'])

final.head()