# Clean ZIMAS / zoning file

In [26]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import utils

In [27]:
catalog = intake.open_catalog("../catalogs/*.yml")

In [28]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', None)

## Parse zoning string

In [29]:
df = catalog.zoning.read()
df = df[['ZONE_CMPLT']].drop_duplicates()

In [30]:
split = df.ZONE_CMPLT.str.split('-', expand = True)
split.rename(columns = {0: 'z1', 1: 'z2', 2: 'z3', 3: 'z4', 4: 'z5'}, inplace = True)

# Replace any Nones with blanks, so that later, function can iterate over those as strings
for col in ['z1', 'z2', 'z3', 'z4', 'z5']:
    split[col] = split[col].fillna('')

split.head()

Unnamed: 0,z1,z2,z3,z4,z5
0,C2,1,SP,,
1,C1.5,1,SP,,
4,[Q]C1.5,1,,,
10,[Q]C1.5,1,RIO,,
12,[Q]C1.5,1,O,,


In [31]:
df = pd.concat([df, split], axis = 1)

In [32]:
df.head()

Unnamed: 0,ZONE_CMPLT,z1,z2,z3,z4,z5
0,C2-1-SP,C2,1,SP,,
1,C1.5-1-SP,C1.5,1,SP,,
4,[Q]C1.5-1,[Q]C1.5,1,,,
10,[Q]C1.5-1-RIO,[Q]C1.5,1,RIO,,
12,[Q]C1.5-1-O,[Q]C1.5,1,O,,


In [33]:
def zone_class(row):
    """
    This function tags the 35 different zone classes based
    on the first letter
    """
    residential_zones = ['RA', 'RE', 'RS', 'R1', 'RU', 'RZ', 'RW1', 
                         'R2', 'RD', 'RMP', 'RW2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5']
    commercial_zones = ['CR', 'C1', 'C1.5', 'C4' ,'C2', 'C5', 'CM']
    manufacturing_zones = ['MR1', 'M1', 'MR2', 'M2', 'M3']
    
    text = row.z1
    
    if 'OS' in text:
        return 'open_space'
    elif ('A1' in text) or ('A2' in text):
        return 'agricultural'
    elif any(zone in text for zone in residential_zones):
        return 'residential'
    elif any(zone in text for zone in commercial_zones):
        return 'commercial'
    elif any(zone in text for zone in manufacturing_zones):
        return 'manufacturing'
    elif 'PF' in text:
        return 'public_facility'
    elif (('P' in text) or ('PB' in text)) and ('PF' not in text):
        return 'parking'
    elif 'SL' in text:
        return 'submerged_lands'
    else:
        return ''

df['zone_class'] = df.apply(zone_class, axis = 1)     

In [34]:
df.zone_class.value_counts()

residential        892
commercial         580
manufacturing      146
parking            115
public_facility     69
agricultural        55
open_space          42
                    33
submerged_lands      2
Name: zone_class, dtype: int64

In [35]:
def prefix(row):
    """
    This function tags the prefix, Q or T
    Q: Qualified Classification are restrictions to ensure compatibility with surrounding property
    T: Tentative Zone Classifications are City Council requirements for public improvements
    """
    text = row.z1

    if 'Q' in text:
        return 'Q'
    elif 'T' in text:
        return 'T'
    else:
        return ''
    
df['prefix'] = df.apply(prefix, axis = 1)  

In [75]:
# Fix prefixes..it's possible to have both Q and T
for letter in ['Q', 'T']:
    new_col = f'prefix_{letter}'
    df[new_col] = df.apply(lambda row: 1 if letter in row.z1 else 0, axis = 1)
    
# This is still imperfect...sometimes T is not enclosed in parantheses or brackets, but there are also cases where specific plan is listed as the first substring

In [78]:
df[df.prefix_T == 1].head()

Unnamed: 0,ZONE_CMPLT,z1,z2,z3,z4,z5,zone_class,prefix,height_district,height_district_d_limit,prefix_Q,prefix_T
70,(WC)TOPANGA-SN-RIO,(WC)TOPANGA,SN,RIO,,,parking,T,,,0,1
609,(T)(Q)C2-2D,(T)(Q)C2,2D,,,,commercial,Q,2.0,2.0,1,1
686,(T)(Q)RAS4-1,(T)(Q)RAS4,1,,,,residential,Q,1.0,1.0,1,1
745,(T)(Q)C2-1-CDO,(T)(Q)C2,1,CDO,,,commercial,Q,1.0,1.0,1,1
800,[T][Q]C2-1D,[T][Q]C2,1D,,,,commercial,Q,1.0,1.0,1,1


In [36]:
df.prefix.value_counts()

Q    1003
      863
T      68
Name: prefix, dtype: int64

In [94]:
# Make a dictionary for height district
height_district_and_d_limit = {
    '1': '1', 
    '1D': '1D', 
    '1L': '1L', 
    '1LD': '1LD', 
    '1VL': '1VL', 
    '1VLD': '1VLD', 
    '1XL': '1XL', 
    '1XLD': '1XLD', 
    '1SS': '1SS',
    '1SSD': '1SSD', 
    '2': '2', 
    '2D': '2D',
    '3': '3',
    '3D': '3D',
    '4': '4',
    '4D': '4D',
}


df['height_district_d_limit'] = df.z2.map(height_district_and_d_limit)
df.height_district_d_limit = df.height_district_d_limit.fillna('')

# Now truncate the D off and get just the height district portion
df['height_district'] = df.apply(lambda row: row.height_district_d_limit[:-1] if 'D' in row.height_district_d_limit
                                 else row.height_district_d_limit, axis = 1)

#for col in ['z1', 'z2', 'z3', 'z4', 'z5']:
    #height_district_dict.get(col, '')
# df["Date"] = df["Member"].apply(lambda x: d.get(x))

In [95]:
df.height_district.value_counts()

1      860
1VL    393
2      249
1XL    195
1L     102
        88
4       38
3        9
Name: height_district, dtype: int64

In [88]:
df['d_limits'] = df.apply(lambda row: 'yes' if 'D' in row.height_district_d_limit else 'no', axis = 1)

In [37]:
""" 
def height_district(row):
    """
    This function tags the height district based on
    Table 2 - Height Districts (Height, Stories, FAR & RFAR)
    """
    text = row.z2
            
    if (text == '1') or (text == '1D'):
        return '1'
    elif (text == '1L') or (text == '1LD'):
        return '1L'
    elif (text == '1VL') or (text == '1VLD'):
        return '1VL'
    elif (text == '1XL') or (text == '1XLD'):
        return '1XL'
    elif (text == '1SS') or (text == '1SSD'):
        return '1SS'
    elif (text == '2') or (text == '2D'):
        return '2'
    elif (text == '3') or (text == '3D'):
        return '3'
    elif (text == '4') or (text == '4D'):
        return '4'
    else:
        return ''
    
df['height_district'] = df.apply(height_district, axis = 1)
"""

In [14]:
"""
def d_limits(row):
    """
    This function tags the development limits, which further restrict 
    height, FAR, % lot coverage, building setback. 
    Sometimes, height districts have additional development limits.
    Tag this as 0 or 1, and only if there are already height districts.
    If there are no height districts, then it's NaN.
    """
    
    yes_development_limits = ['1D', '1LD', '1VLD', '1XLD', '1SSD', 
                          '2D', '3D', '4D']
    no_development_limits = ['1', '1L', '1VL', '1XL', '1SS', 
                          '2', '3', '4']
    
    text = row.z2
    height = row.height_district
    
    if any(str in text for str in yes_development_limits) and (height != ''):
        return 1
    if any(str in text for str in no_development_limits) and (height != ''):
        return 0
    if height == '':
        return np.nan
    
df['d_limits'] = df.apply(d_limits, axis = 1) 
"""

In [15]:
df.d_limits.value_counts()

0.0    1543
1.0     303
Name: d_limits, dtype: int64

In [16]:
# Find the last substring to parse, which can be between z2-z5
df['num_substrings'] = df['ZONE_CMPLT'].str.count('-') + 1

In [17]:
df.num_substrings.value_counts()

3    907
2    644
4    323
1     44
5     16
Name: num_substrings, dtype: int64

In [18]:
df[df.num_substrings==3].head()

Unnamed: 0,ZONE_CMPLT,z1,z2,z3,z4,z5,zone_class,prefix,height_district,d_limits,num_substrings
0,C2-1-SP,C2,1,SP,,,commercial,,1,0.0,3
1,C1.5-1-SP,C1.5,1,SP,,,commercial,,1,0.0,3
10,[Q]C1.5-1-RIO,[Q]C1.5,1,RIO,,,commercial,Q,1,0.0,3
12,[Q]C1.5-1-O,[Q]C1.5,1,O,,,commercial,Q,1,0.0,3
13,[Q]C1.5-1VL-CUGU,[Q]C1.5,1VL,CUGU,,,commercial,Q,1VL,0.0,3


In [None]:
# Make a dictionary for supplemental use
supplemental_use_dict = {
    'O': 'oil_drilling', 
    'S': 'animal_slaughtering', 
    'G': 'surface_mining', 
    'K': 'equinekeeping', 
    'CA': 'commercial_and_artcraft', 
    'MU': 'mixed_use', 
    'FH': 'fence_heights', 
    'SN': 'sign', 
    'HS': 'hillside_standards',
    'RG': 'rear_detached_garage', 
    'RPD': 'residential_planned_development', 
    'POD': 'pedestrian_oriented_district',
    'CDO': 'community_design_overlay',
    'NSO': 'neighborhood_stabilization_ordinance',
    'RFA': 'residential_floor_area',
    'MPR': 'modified_parking_requirement',
    'RIO': 'river_improvement_overlay',
    'HCR': 'hillside_construction_regulation',
    'CPIO': 'community_plan_implementation_overlay',
    'CUGU': 'clean_up_green_up_overlay',
}

In [None]:
# Make a dictionary for other zoning designation
other_zoning_dict = {
    'CW': 'central_city_west_specific_plan', 
    'GM': 'glencoe_maxella_specific_plan', 
    'OX': 'oxford_triangle_specific_plan', 
    'PV': 'playa_vista_specific_plan', 
    'WC': 'warner_center_specific_plan', 
    'ADP': 'alameda_district_specific_plan', 
    'CCS': 'century_city_south_studio_zone', 
    'CSA': 'centers_study_area', 
    'PKM': 'park_mile_specific_plan',
    'LAX': 'los_angeles_airport_specific_plan', 
    'HPOZ': 'historic_preservation_overlay_zone', 
    'LASED': 'la_sports_and_entertainment_specific_plan',
    'USC-1A': 'usc_university_park_campus_specific_plan_subarea_1a_zone',
    'USC-1B': 'usc_university_park_campus_specific_plan_subarea_1b_zone',    
    'USC-2': 'usc_university_park_campus_specific_plan_subarea_2_zone',
    'USC-3': 'usc_university_park_campus_specific_plan_subarea_3_zone',
    'PVSP': 'ponte_vista_at_san_pedro_specific_plan'
}

In [20]:
"""
def supplemental_use_district(row):
    """
    This function tags the supplemental use districts / overlays.
    These appear at the end of the zoning string, after the height/development limits (if there are any).
    List in order of 1 letter, 2, 3, 4 letters. Can overwrite if need be as it loops through each row.
    """
    
    text = row.ZONE_CMPLT
    text2 = row.z2
    text3 = row.z3
    text4 = row.z4
    text5 = row.z5
    parts = row.num_substrings
    
    if (('O' in text2) and (parts == 2)) or (('O' in text3) and (parts == 3)) or (('O' in text4) and (parts == 4)) or (('O' in text5) and (parts ==5)):
        return 'oil_drilling'
    if (('S' in text2) and (parts == 2)) or (('S' in text3) and (parts == 3)) or (('S' in text4) and (parts == 4)) or (('S' in text5) and (parts ==5)):
        return 'animal_slaughtering'
    if (('G' in text2) and (parts == 2)) or (('G' in text3) and (parts == 3)) or (('G' in text4) and (parts == 4)) or (('G' in text5) and (parts ==5)):
        return 'surface_mining'
    if (('K' in text2) and (parts == 2)) or (('K' in text3) and (parts == 3)) or (('K' in text4) and (parts == 4)) or (('K' in text5) and (parts ==5)):
        return 'equinekeeping'
    
    if 'CA' in text:
        return 'commercial_and_artcraft'
    if 'MU' in text:
        return 'mixed_use'
    if 'FH' in text:
        return 'fence_heights'
    if 'SN' in text:
        return 'sign'    
    if 'HS' in text:
        return 'hillside_standards' 
    if 'RG' in text:
        return 'rear_detached_garage' 
    
    if 'RPD' in text:
        return 'residential_planned_development'
    if 'POD' in text:
        return 'pedestrian_oriented'
    if 'CDO' in text:
        return 'community_design_overlay'
    if 'NSO' in text:
        return 'neighborhood_stabilization_ordinance'
    if 'RFA' in text:
        return 'residential_floor_area'
    if 'MPR' in text:
        return 'modified_parking_requirement'
    if 'RIO' in text:
        return 'river_improvement_overlay'
    if 'HCR' in text:
        return 'hillside_construction_regulation'
    
    if 'CPIO' in text:
        return 'community_plan_implementation_overlay'
    if 'CUGU' in text:
        return 'clean_up_green_up_overlay'
    
    else:
        return ''

df['supplemental_use'] = df.apply(supplemental_use_district, axis = 1) 
"""

In [21]:
df.supplemental_use.value_counts()

oil_drilling                        767
                                    683
surface_mining                      279
animal_slaughtering                  65
equinekeeping                        62
hillside_construction_regulation     37
commercial_and_artcraft              19
residential_floor_area               12
residential_planned_development       7
mixed_use                             1
river_improvement_overlay             1
modified_parking_requirement          1
Name: supplemental_use, dtype: int64

In [24]:
#df[df.supplemental_use == ''].ZONE_CMPLT.value_counts()

In [25]:
"""
def other_zoning_designation(row):
    """
    This function tags the other zoning designations.
    Is it possible that there are both supplemental uses & additional zoning designations?
    Based off of Table 2.
    
    Also, there are additional specific plans found in Zoning Code Article 2, Sec 12.04 Zones - Districts - Symbols.
    """
    
    text = row.ZONE_CMPLT
    
    if 'CW' in text:
        return 'central_city_west_specific_plan'   
    if 'GM' in text :
        return 'glencoe_maxella_specific_plan' 
    if 'OX' in text:
        return 'oxford_triangle_specific_plan'    
    if ('PV' in text) and ('PVSP' not in text):
        return 'playa_vista_specific_plan'  
    if 'WC' in text :
        return 'warner_center_specific_plan'
    
    if 'ADP' in text:
        return 'alameda_district_specific_plan'
    if 'CCS' in text:
        return 'century_city_south_studio_zone'
    if 'CSA' in text:
        return 'centers_study_area'
    if 'PKM' in text :
        return 'park_mile_specific_plan'
    if 'LAX' in text:
        return 'los_angeles_airport_specific_plan'
    
    if 'HPOZ' in text:
        return 'historic_preservation_overlay_zone'
    
    if 'LASED' in text:
        return 'la_sports_and_entertainment_specific_plan'
    if 'USC-2' in text:
        return 'usc_university_park_campus_specific_plan_subarea_2_zone'
    if 'USC-3' in text:
        return 'usc_university_park_campus_specific_plan_subarea_3_zone'
    if 'PVSP' in text:
        return 'ponte_vista_at_san_pedro_specific_plan'
    
    if 'USC-1A' in text:
        return 'usc_university_park_campus_specific_plan_subarea_1a_zone'
    if 'USC-1B' in text:
        return 'usc_university_park_campus_specific_plan_subarea_1b_zone'
    
    else:
        return ''

df['other_zoning'] = df.apply(other_zoning_designation, axis = 1) 
"""

In [27]:
#df[df.other_zoning == ''].ZONE_CMPLT.value_counts()

In [28]:
df[df.ZONE_CMPLT.str.contains('ADP')]

Unnamed: 0,ZONE_CMPLT,z1,z2,z3,z4,z5,zone_class,prefix,height_district,d_limits,num_substrings,supplemental_use,other_zoning
8449,ADP-RIO,ADP,RIO,,,,parking,,,,2,oil_drilling,alameda_district_specific_plan


In [None]:
# There can be multiple supplemental uses...need to tag them all
# Also, the 1 letter ones are incorrectly flagged

## Merge back together

In [None]:
full = catalog.zoning.read()
full.crs = {'init':'epsg:2229'}

In [None]:
df2 = pd.merge(full, df, on = 'ZONE_CMPLT', how = 'left', validate = 'm:1')