# Clean ZIMAS / zoning file

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

In [3]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', None)

In [4]:
df = catalog.zoning.read()
df = df[['ZONE_CMPLT']].drop_duplicates()

## Parse zoning string

In [5]:
def parse_zoning(row):
    try:
        z = utils.ZoningInfo(row.ZONE_CMPLT)
        return pd.Series([z.Q, z.T, z.zone_class, z.specific_plan, z.height_district, z.D, z.overlay, 
                          z.invalid_zone, z.invalid_height], 
                        index = ['Q', 'T', 'zone_class', 'specific_plan', 'height_district', 'D', 'overlay', 
                                 'invalid_zone', 'invalid_height'])
    except ValueError:
        try:
            z = utils.Reparse_NoHyphen(row.ZONE_CMPLT)
            return pd.Series([z.Q, z.T, z.zone_class, z.specific_plan, z.height_district, z.D, z.overlay, 
                              z.invalid_zone, z.invalid_height], 
                            index = ['Q', 'T', 'zone_class', 'specific_plan', 'height_district', 'D', 'overlay', 
                                     'invalid_zone', 'invalid_height'])            
        except ValueError:
            return pd.Series(['failed', 'failed', 'failed', 'failed', 'failed', 'failed', 'failed', '', ''], 
                            index = ['Q', 'T', 'zone_class', 'specific_plan', 'height_district', 'D', 'overlay', 
                                     'invalid_zone', 'invalid_height'])

parsed = df.apply(parse_zoning, axis = 1)

df = pd.concat([df, parsed], axis = 1)

df.head()

Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,specific_plan,height_district,D,overlay,invalid_zone,invalid_height
0,C2-1-SP,False,False,C2,,1,False,[SP],,
1,C1.5-1-SP,False,False,C1.5,,1,False,[SP],,
4,[Q]C1.5-1,True,False,C1.5,,1,False,,,
10,[Q]C1.5-1-RIO,True,False,C1.5,,1,False,[RIO],,
12,[Q]C1.5-1-O,True,False,C1.5,,1,False,[O],,


In [9]:
print(len(df[df.Q == 'failed']))
print(df[df.Q == 'failed'].ZONE_CMPLT.value_counts())

(WC)UPTOWN-SN-RIO          1
VARIOUS                    1
A1                         1
FRWY                       1
R1V3                       1
R4                         1
(WC)TOPANGA-SN             1
(WC)PARK-SN                1
(F)RE11-1                  1
PVSP                       1
R1-1XL-O#-CUGU             1
(WC)DOWNTOWN-SN            1
C2-1VL-CPIO                1
SL                         1
(WC)RIVER-SN-RIO           1
R1V1                       1
RE15-1-H#                  1
R1V2                       1
(WC)COMMERCE-SN            1
M2                         1
[Q]C4(OX)-2D-CDO           1
OS                         1
CW                         1
R1R3                       1
[Q]CM(GM)-2D-CA            1
R1H1                       1
LASED                      1
(WC)COLLEGE-SN             1
RE15-1-RPD-2.9-H           1
(WC)NORTHVILLAGE-SN-RIO    1
(F)CM-1-CUGU               1
(F)CM-1-HPOZ               1
LACFCD                     1
PPSP                       1
PF            

In [None]:
""" 
For no_hyphens, which takes form of like A1 or RE15(PV), use this re.compile
    ([0-9A-Z]+)(\([A-Z]+\))*
    
With hyphen, try this:
    ([0-9A-Z]+)*(\([A-Z]+\))*([-1-4DLVXS]+)*((?:-[A-Z]+)*)$
"""

## Zone Class

In [10]:
# Check if valid zone classes are used
df['valid_zone'] = df.zone_class.isin(utils.VALID_ZONE_CLASS)

df.valid_zone.value_counts()

True     1819
False     115
Name: valid_zone, dtype: int64

In [11]:
print(f'# obs with invalid zones: {len(df[df.valid_zone == False])}')
print(f'# of unique invalid zones: {df[df.valid_zone == False].zone_class.nunique()}')

# obs with invalid zones: 115
# of unique invalid zones: 2


In [12]:
# Look at what these invalid zones are
# Appears that you can have multiple zones? R3 + P? Is this correct?
# Or, is there a hierarchy, where least restrictive includes all uses of most restrictive, and that spans across residential, agricultural, etc.
df[df.zone_class == 'invalid'].invalid_zone.value_counts()

        17
R3P     10
R4P      9
R1P      8
R1R3     4
USC      4
RAP      3
R1V3     3
R1V2     2
R2P      2
R5P      2
R1H1     2
RZ5      2
A2P      1
CEC      1
R1V1     1
CCS      1
RSP      1
ADP      1
Name: invalid_zone, dtype: int64

In [13]:
def zone_class(row):
    """
    This function tags the 35 different zone classes based
    on the first letter
    """
    
    open_space_zones = ['OS']
    agricultural_zones = ['A1', 'A2']
    residential_zones = ['RA', 'RE', 'RS', 'R1', 'RU', 'RZ', 'RW1', 
                         'R2', 'RD', 'RMP', 'RW2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5']
    commercial_zones = ['CR', 'C1', 'C1.5', 'C4' ,'C2', 'C5', 'CM']
    manufacturing_zones = ['MR1', 'M1', 'MR2', 'M2', 'M3']
    
    text = row.zone_class
    
    if any(zone in text for zone in open_space_zones):
        return 'open_space'
    if any(zone in text for zone in agricultural_zones):
        return 'agricultural'
    if any(zone in text for zone in residential_zones):
        return 'residential'
    if any(zone in text for zone in commercial_zones):
        return 'commercial'
    if any(zone in text for zone in manufacturing_zones):
        return 'manufacturing'
    if 'PF' in text:
        return 'public_facility'
    if (('P' in text) or ('PB' in text)) and ('PF' not in text):
        return 'parking'
    if 'SL' in text:
        return 'submerged_lands'
    else:
        return ''

df['zone_class_descrip'] = df.apply(zone_class, axis = 1)   

df.zone_class_descrip.value_counts()

residential        831
commercial         574
manufacturing      144
                   115
parking            107
public_facility     68
agricultural        53
open_space          41
submerged_lands      1
Name: zone_class_descrip, dtype: int64

## Height District

In [14]:
df['valid_height'] = df.apply(lambda row: False if row.height_district in(['', 'invalid']) else True, axis = 1)

df.valid_height.value_counts()

True     1870
False      64
Name: valid_height, dtype: int64

In [15]:
# Look at what these invalid heights are.
# 1VL, 2VL seem like they could be legitimate. What is EZ1VL?
df[df.valid_height == False].invalid_height.value_counts()

         39
O         8
RG        4
EZ1VL     3
2L        2
2VL       1
HPOZ      1
CPIO      1
1B        1
1A        1
CSA1      1
CDO       1
RIO       1
Name: invalid_height, dtype: int64

In [16]:
print(f'# obs with invalid height districts: {len(df[df.valid_height == False])}')
print(f'# of unique invalid height districts: {df[df.valid_height == False].invalid_height.nunique()}')

# obs with invalid height districts: 64
# of unique invalid height districts: 13


## D Limit

In [17]:
df.D.value_counts()

False     1594
True       299
failed      41
Name: D, dtype: int64

In [18]:
df[df.D == True].height_district.value_counts()
# Checked, if there is height district followed by a D, those are all tagged as True

2      138
1       61
1VL     39
4       31
1L      22
1XL      6
3        2
Name: height_district, dtype: int64

In [None]:
# Will need to make sure it's not mixed-type at time of export. Cannot be boolean and string ''

# But, right now still need a way to tag the ones that never got parsed. Come back to this

#df.D = df.replace('', False)
#df.D = df.D.astype(bool)

## Overlay

In [19]:
df.dtypes

ZONE_CMPLT            object
Q                     object
T                     object
zone_class            object
specific_plan         object
height_district       object
D                     object
overlay               object
invalid_zone          object
invalid_height        object
valid_zone              bool
zone_class_descrip    object
valid_height            bool
dtype: object

In [20]:
type(df.overlay.iloc[1])

list

In [27]:
# Fill in Nones, otherwise cannot do the apply to make the list a string
just_overlay = df[df.overlay != 'failed'][['overlay']]

In [29]:
just_overlay['no_brackets'] = just_overlay['overlay'].apply(', '.join)

In [30]:
just_overlay.no_brackets.value_counts()

                 664
RIO              180
O                148
CUGU             141
CDO              100
CPIO              84
HPOZ              73
O, CUGU           54
CDO, RIO          52
K                 45
SN                36
RIO, CUGU         29
HCR               27
H                 26
O, HPOZ           25
CDO, CUGU         21
O, CPIO           18
G, CUGU           17
K, RIO            13
H, HCR            10
RFA               10
SP                 9
H, K               9
O, CDO             8
CA                 8
O, SN              7
O, K               6
HPOZ, CPIO         6
RIO, POD           6
O, CA              5
K, CUGU            5
H, RIO             4
H, RPD, HCR        4
O, CDO, CUGU       4
CDO, SN            4
K, CPIO            4
G                  3
O, HPOZ, CPIO      3
SN, CPIO           3
O, SP              2
K, HPOZ            2
POD                2
O, POD             2
MPR                1
O, SN, CPIO        1
G, RIO             1
O, RFA             1
H, O         

In [40]:
# Make a dictionary for supplemental use (map to overlay)
supplemental_use_dict = {
    # Supplemental Use found in Table 2 or Zoning Code Article 3
    'O': 'oil_drilling', 
    'S': 'animal_slaughtering', 
    'G': 'surface_mining', 
    'K': 'equinekeeping', 
    'CA': 'commercial_and_artcraft', 
    'MU': 'mixed_use', 
    'FH': 'fence_heights', 
    'SN': 'sign', 
    'HS': 'hillside_standards',
    'RG': 'rear_detached_garage', 
    'RPD': 'residential_planned_development', 
    'POD': 'pedestrian_oriented_district',
    'CDO': 'community_design_overlay',
    'NSO': 'neighborhood_stabilization_ordinance',
    'RFA': 'residential_floor_area',
    'MPR': 'modified_parking_requirement',
    'RIO': 'river_improvement_overlay',
    'HCR': 'hillside_construction_regulation',
    'CPIO': 'community_plan_implementation_overlay',
    'CUGU': 'clean_up_green_up_overlay',
    'HPOZ': 'historic_preservation_overlay_zone', 
}

In [33]:
specific_plan_dict = {
    # Are other zoning designations put in with zone class or overlay? HPOX appears as overlay, but a lot of other ones appear in the beginning with zone class.
    # Even more designations found in Zoning Code Article 2
    'CEC': 'convention_and_event_center_specific_plan',
    # Other Zoning Designations found in Zoning Code Article 2, Sec 12.04 Zones - Districts - Symbols.
    # Are these always in parantheses in the beginning, near the prefix??
    'CW': 'central_city_west_specific_plan', 
    'GM': 'glencoe_maxella_specific_plan', 
    'OX': 'oxford_triangle_specific_plan', 
    'PV': 'playa_vista_specific_plan', 
    'WC': 'warner_center_specific_plan', 
    'ADP': 'alameda_district_specific_plan', 
    'CCS': 'century_city_south_studio_zone', 
    'CSA': 'centers_study_area', 
    'PKM': 'park_mile_specific_plan',
    'LAX': 'los_angeles_airport_specific_plan', 
    #'HPOZ': 'historic_preservation_overlay_zone', 
    'LASED': 'la_sports_and_entertainment_specific_plan',
    'USC-1A': 'usc_university_park_campus_specific_plan_subarea_1a_zone',
    'USC-1B': 'usc_university_park_campus_specific_plan_subarea_1b_zone',    
    'USC-2': 'usc_university_park_campus_specific_plan_subarea_2_zone',
    'USC-3': 'usc_university_park_campus_specific_plan_subarea_3_zone',
    'PVSP': 'ponte_vista_at_san_pedro_specific_plan'
}

In [34]:
split = just_overlay.no_brackets.str.split(',', expand = True).fillna('')
split.rename(columns = {0: 'o1', 1: 'o2', 2: 'o3'}, inplace = True)

In [41]:
# Compile list of unique overlays, and see how well that matches with the dict
unique_overlays = pd.DataFrame(split.o1.append(split.o2).append(split.o3))

In [42]:
unique_overlays.rename(columns = {0: 'overlay'}, inplace = True)
unique_overlays.overlay = unique_overlays.overlay.str.strip()
unique_overlays = unique_overlays.drop_duplicates()
unique_overlays = unique_overlays.reset_index().drop(columns = ['index'])

In [43]:
unique_overlays['overlay_descrip'] = unique_overlays.overlay.map(supplemental_use_dict)

In [47]:
unique_overlays[unique_overlays.overlay_descrip.isna()]

Unnamed: 0,overlay,overlay_descrip
0,SP,
1,,
10,H,


In [None]:
# What is SP? Specific Plan?
# Also, can't find H

## Misclasified
* Manually fix?

In [None]:
# This is not working as it should -- o2 and o3 aren't getting filled
for col in ['o1', 'o2', 'o3']:
    new_col = f'{col}_descrip'
    df[col] = df[col].str.strip()
    df[new_col] = df[col].map(supplemental_use_dict)
    df[new_col] = df[new_col].fillna('')

In [None]:
df.head()

In [None]:
split = pd.DataFrame(just_overlay['overlay'].values.tolist(), columns = ['o1','o2', 'o3'], index = just_overlay.index).fillna('')

tag_overlays = pd.concat([just_overlay, split], axis = 1)

tag_overlays.head()

In [None]:
def find_overlays(row, overlays, op=all):
    if row.overlay is None:
        return False
    else:
        return op([o in row.overlay for o in overlays])

df[df.apply(lambda x: find_overlays(x, ['RIO', 'CDO'], op=any), axis=1)]

In [None]:
# Need to clean up where o1 didn't map to anything, and o2 and o3 are filled. Make sure the stuff that isn't overlays is removed.
# Maybe do long first, then make wide again?

In [None]:
df[df.ZONE_CMPLT.str.contains('ADP')]

In [None]:
# Address the Nones first before converting to int
""" 
Q: Qualified Classification are restrictions to ensure compatibility with surrounding property
T: Tentative Zone Classifications are City Council requirements for public improvements

for col in ['Q', 'T']:
    df[col] = df[col].astype(int)
"""

## Merge back together

In [None]:
full = catalog.zoning.read()
full.crs = {'init':'epsg:2229'}

In [None]:
df2 = pd.merge(full, df, on = 'ZONE_CMPLT', how = 'left', validate = 'm:1')