# Clean ZIMAS / zoning file

In [2]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import utils

In [3]:
catalog = intake.open_catalog("../catalogs/*.yml")

In [4]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', None)

In [5]:
df = catalog.zoning.read()
df = df[['ZONE_CMPLT']].drop_duplicates()

## Parse zoning string

In [6]:
def parse_zoning(row):
    try:
        z = utils.ZoningInfo(row.ZONE_CMPLT)
        return pd.Series([z.Q, z.T, z.zone_class, z.height_district, z.D, z.overlay], 
                        index = ['Q', 'T', 'zone_class', 'height_district', 'D', 'overlay'])
    except ValueError as err:
        if err.message == 
        return pd.Series(['', '', '', '', '', ''], 
                        index = ['Q', 'T', 'zone_class', 'height_district', 'D', 'overlay'])

parsed = df.apply(parse_zoning, axis = 1)

df = pd.concat([df, parsed], axis = 1)

df.head()

Unnamed: 0,ZONE_CMPLT,Q,T,zone_class,height_district,D,overlay
0,C2-1-SP,False,False,C2,1,False,[SP]
1,C1.5-1-SP,False,False,C1.5,1,False,[SP]
4,[Q]C1.5-1,True,False,C1.5,1,False,
10,[Q]C1.5-1-RIO,True,False,C1.5,1,False,[RIO]
12,[Q]C1.5-1-O,True,False,C1.5,1,False,[O]


## Zone Class

In [7]:
# Check if valid zone classes are used
valid_zone_class = [
    'A1', 'A2', 'RA',
    'RE', 'RE40', 'RE20', 'RE15', 'RE11', 'RE9',
    'RS', 'R1', 'R1V', 'R1F', 'R1R', 'R1H', 'RU', 'RZ2.5', 'RZ3', 'RZ4', 'RW1',
    'R2', 'RD1.5', 'RD2', 'RD3', 'RD4', 'RD5', 'RD6', 'RMP', 'RW2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5',
    'CR', 'C1', 'C1.5', 'C2', 'C4', 'C5', 'CM',
    'MR1', 'M1', 'MR2', 'M2', 'M3',
    'P', 'PB', 'OS', 'PF', 'SL'
]


df['valid_zone'] = df.zone_class.isin(valid_zone_class)

In [8]:
print(f'# obs with invalid zones: {len(df[df.valid_zone == False])}')
print(f'# of unique invalid zones: {df[df.valid_zone == False].zone_class.nunique()}')

# obs with invalid zones: 147
# of unique invalid zones: 1


In [9]:
# Appears that you can have multiple zones? R3 + P? Is this correct?
# Or, is there a hierarchy, where least restrictive includes all uses of most restrictive, and that spans across residential, agricultural, etc.
df[df.valid_zone == False].zone_class.value_counts()

    147
Name: zone_class, dtype: int64

In [None]:
def zone_class(row):
    """
    This function tags the 35 different zone classes based
    on the first letter
    """
    
    open_space_zones = ['OS']
    agricultural_zones = ['A1', 'A2']
    residential_zones = ['RA', 'RE', 'RS', 'R1', 'RU', 'RZ', 'RW1', 
                         'R2', 'RD', 'RMP', 'RW2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5']
    commercial_zones = ['CR', 'C1', 'C1.5', 'C4' ,'C2', 'C5', 'CM']
    manufacturing_zones = ['MR1', 'M1', 'MR2', 'M2', 'M3']
    
    text = row.zone_class
    
    if any(zone in text for zone in open_space_zones):
        return 'open_space'
    if any(zone in text for zone in agricultural_zones):
        return 'agricultural'
    if any(zone in text for zone in residential_zones):
        return 'residential'
    if any(zone in text for zone in commercial_zones):
        return 'commercial'
    if any(zone in text for zone in manufacturing_zones):
        return 'manufacturing'
    if 'PF' in text:
        return 'public_facility'
    if (('P' in text) or ('PB' in text)) and ('PF' not in text):
        return 'parking'
    if 'SL' in text:
        return 'submerged_lands'
    else:
        return ''

df['zone_class_descrip'] = df.apply(zone_class, axis = 1)   

df.zone_class_descrip.value_counts()

## Height District

In [None]:
# Check if valid height districts are used
valid_height_district = [
    '1', '1D', 
    '1L', '1LD', 
    '1VL', '1VLD', 
    '1XL', '1XLD', 
    '1SS', '1SSD', 
    '2', '2D', 
    '3', '3D', 
    '4', '4D', 
]

invalid_height_district = ['1A', '1B']

In [None]:
def valid_heights(row):
    
    text = row.height_district
    
    if any(height in text for height in valid_height_district) and not any(wrong_height in text for wrong_height in invalid_height_district):
        return True
    else: 
        return False

df['valid_height'] = df.apply(valid_heights, axis = 1)

In [None]:
df[df.valid_height == True].height_district.value_counts()

# Still need to grab the correct height district values

In [None]:
df[df.valid_height == False].height_district.value_counts()

In [None]:
# There are some getting tagged as height district but still needs further cleaning
def height_district(row):
    """
    This function tags the height district based on
    Table 2 - Height Districts (Height, Stories, FAR & RFAR)
    """
    text = row.height_district
            
    if (('1' in text) or ('1D' in text)) and ('1A' not in text) and ('1B' not in text):
        return '1'
    elif ('1L' in text) or ('1LD' in text):
        return '1L'
    elif ('1VL' in text) or ('1VLD' in text):
        return '1VL'
    elif ('1XL' in text) or ('1XLD' in text):
        return '1XL'
    elif ('1SS' in text) or ('1SSD' in text):
        return '1SS'
    elif ('2' in text) or ('2D' in text):
        return '2'
    elif ('3' in text) or ('3D' in text):
        return '3'
    elif ('4' in text) or ('4D' in text):
        return '4'
    else:
        return ''
    
df['height_district2'] = df.apply(height_district, axis = 1)

In [None]:
df.height_district2.value_counts()

In [None]:
df[df.height_district2 ==''].height_district.value_counts()

In [None]:
print(f'# obs with invalid height districts: {len(df[df.valid_height == False])}')
print(f'# of unique invalid height districts: {df[df.valid_height == False].height_district.nunique()}')
df[df.valid_height == False].head()

In [None]:
""" 
OLD CODE -- revisit if need pieces


# Tag the obs that have additional information in height district column. 
# Move additional info to overlay column
df['fix_height_district'] = df.apply(lambda row: 1 if (row.height_district != '') 
                                     and (row.height_district2 == '') else 0, axis = 1)

df[df.fix_height_district==1]

df['correct_overlay'] = df.apply(lambda row: row.overlay + ', ' + row.height_district if row.fix_height_district == 1 else row.overlay, axis = 1)

df[df.fix_height_district==1]

# There are still some incorrect things that are now moved to correct_overlay (ex: R1R3, CPIO...R1R3 appears in zone_class and correct_overlay)
# It's ok, these incorrect ones just won't map using the supplemental_use_dict


# Replace overlay with correct_height_district
df['overlay'] = df.apply(lambda row: row.correct_overlay if row.fix_height_district == 1 else row.overlay, axis = 1)

# Overwrite height_district with blanks
df['height_district'] = df.apply(lambda row: '' if row.fix_height_district == 1 else row.height_district, axis = 1)


df[df.fix_height_district==1]

df = df.drop(columns = ['fix_height_district', 'correct_overlay'])
"""

## D Limit

In [None]:
df.D.value_counts()

In [None]:
df[df.D == True].height_district.value_counts()
# Checked, if there is height district followed by a D, those are all tagged as True

In [None]:
# Will need to make sure it's not mixed-type at time of export. Cannot be boolean and string ''

## Overlay

In [None]:
df.dtypes

In [None]:
type(df.overlay.iloc[1])

In [None]:
df.head(20)

In [None]:
# Fill in Nones, otherwise cannot do the apply to make the list a string
df.overlay = df.overlay.fillna('')
just_overlay = df[['overlay']]

In [None]:
def find_overlays(row, overlays, op=all):
    if row.overlay is None:
        return False
    else:
        return op([o in row.overlay for o in overlays])

df[df.apply(lambda x: find_overlays(x, ['RIO', 'CDO'], op=any), axis=1)]

In [None]:
just_overlay['no_brackets'] = just_overlay['overlay'].apply(', '.join)

In [None]:
just_overlay.no_brackets.value_counts()

In [None]:
split = just_overlay.no_brackets.str.split(',', expand = True)
split.rename(columns = {0: 'o1', 1: 'o2', 2: 'o3'}, inplace = True)

for col in ['o1', 'o2', 'o3']:
    split[col] = split[col].fillna('')

In [None]:
df = pd.concat([df, split], axis = 1)
df.head()

In [None]:
# Make a dictionary for supplemental use
supplemental_use_dict = {
    # Supplemental Use found in Table 2 or Zoning Code Article 3
    'O': 'oil_drilling', 
    'S': 'animal_slaughtering', 
    'G': 'surface_mining', 
    'K': 'equinekeeping', 
    'CA': 'commercial_and_artcraft', 
    'MU': 'mixed_use', 
    'FH': 'fence_heights', 
    'SN': 'sign', 
    'HS': 'hillside_standards',
    'RG': 'rear_detached_garage', 
    'RPD': 'residential_planned_development', 
    'POD': 'pedestrian_oriented_district',
    'CDO': 'community_design_overlay',
    'NSO': 'neighborhood_stabilization_ordinance',
    'RFA': 'residential_floor_area',
    'MPR': 'modified_parking_requirement',
    'RIO': 'river_improvement_overlay',
    'HCR': 'hillside_construction_regulation',
    'CPIO': 'community_plan_implementation_overlay',
    'CUGU': 'clean_up_green_up_overlay',
    # Other Zoning Designations found in Zoning Code Article 2, Sec 12.04 Zones - Districts - Symbols.
    'CW': 'central_city_west_specific_plan', 
    'GM': 'glencoe_maxella_specific_plan', 
    'OX': 'oxford_triangle_specific_plan', 
    'PV': 'playa_vista_specific_plan', 
    'WC': 'warner_center_specific_plan', 
    'ADP': 'alameda_district_specific_plan', 
    'CCS': 'century_city_south_studio_zone', 
    'CSA': 'centers_study_area', 
    'PKM': 'park_mile_specific_plan',
    'LAX': 'los_angeles_airport_specific_plan', 
    'HPOZ': 'historic_preservation_overlay_zone', 
    'LASED': 'la_sports_and_entertainment_specific_plan',
    'USC-1A': 'usc_university_park_campus_specific_plan_subarea_1a_zone',
    'USC-1B': 'usc_university_park_campus_specific_plan_subarea_1b_zone',    
    'USC-2': 'usc_university_park_campus_specific_plan_subarea_2_zone',
    'USC-3': 'usc_university_park_campus_specific_plan_subarea_3_zone',
    'PVSP': 'ponte_vista_at_san_pedro_specific_plan'
}

In [None]:
# This is not working as it should -- o2 and o3 aren't getting filled
for col in ['o1', 'o2', 'o3']:
    new_col = f'{col}_descrip'
    df[col] = df[col].str.strip()
    df[new_col] = df[col].map(supplemental_use_dict)
    df[new_col] = df[new_col].fillna('')

In [None]:
df[df.o3 != '']

# Need to clean up where o1 didn't map to anything, and o2 and o3 are filled. Make sure the stuff that isn't overlays is removed.
# Maybe do long first, then make wide again?

In [None]:
df[df.ZONE_CMPLT.str.contains('ADP')]

In [None]:
# Address the Nones first before converting to int
""" 
Q: Qualified Classification are restrictions to ensure compatibility with surrounding property
T: Tentative Zone Classifications are City Council requirements for public improvements

for col in ['Q', 'T']:
    df[col] = df[col].astype(int)
"""

## Merge back together

In [None]:
full = catalog.zoning.read()
full.crs = {'init':'epsg:2229'}

In [None]:
df2 = pd.merge(full, df, on = 'ZONE_CMPLT', how = 'left', validate = 'm:1')