# Clean ZIMAS / zoning file

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

In [3]:
df = catalog.zoning.read()
df.crs = {'init':'epsg:2229'}

In [4]:
df.head()

Unnamed: 0,OBJECTID,ZONE_CMPLT,ZONE_CLASS,ZONE_SMRY,Shape__Area,Shape__Length,geometry
0,2001,C2-1-SP,C2,COMMERCIAL,76597.094727,1315.054994,"POLYGON ((6460317.049 1832062.462, 6460329.590..."
1,2002,C1.5-1-SP,C1.5,COMMERCIAL,85027.776367,1480.306793,"POLYGON ((6459991.770 1832633.240, 6460066.270..."
2,2003,C1.5-1-SP,C1.5,COMMERCIAL,91346.067383,1517.945871,"POLYGON ((6460331.280 1832633.620, 6460331.440..."
3,2004,C1.5-1-SP,C1.5,COMMERCIAL,76787.238281,1362.581239,"POLYGON ((6460068.300 1833302.250, 6460083.350..."
4,2005,[Q]C1.5-1,C1.5,COMMERCIAL,15611.828125,503.048549,"POLYGON ((6479594.100 1840055.860, 6479624.020..."


In [5]:
df = df[['ZONE_CMPLT']].drop_duplicates()

In [6]:
split = df.ZONE_CMPLT.str.split('-', expand = True)
split.rename(columns = {0: 'z1', 1: 'z2', 2: 'z3', 3: 'z4', 4: 'z5'}, inplace = True)

# Replace any Nones with blanks, so that later, function can iterate over those as strings
for col in ['z1', 'z2', 'z3', 'z4', 'z5']:
    split[col] = split[col].fillna('')

split.head()

Unnamed: 0,z1,z2,z3,z4,z5
0,C2,1,SP,,
1,C1.5,1,SP,,
4,[Q]C1.5,1,,,
10,[Q]C1.5,1,RIO,,
12,[Q]C1.5,1,O,,


In [7]:
def zone_class(row):
    """
    This function tags the 35 different zone classes based
    on the first letter
    """
    residential_zones = ['RA', 'RE', 'RS', 'R1', 'RU', 'RZ', 'RW1', 
                         'R2', 'RD', 'RMP', 'RW2', 'R3', 'RAS3', 'R4', 'RAS4', 'R5']
    commercial_zones = ['CR', 'C1', 'C1.5', 'C4' ,'C2', 'C5', 'CM']
    manufacturing_zones = ['MR1', 'M1', 'MR2', 'M2', 'M3']
    
    text = row.z1
    
    if 'OS' in text:
        return 'open_space'
    elif ('A1' in text) or ('A2' in text):
        return 'agricultural'
    elif any(zone in text for zone in residential_zones):
        return 'residential'
    elif any(zone in text for zone in commercial_zones):
        return 'commercial'
    elif any(zone in text for zone in manufacturing_zones):
        return 'manufacturing'
    elif 'PF' in text:
        return 'public_facility'
    elif (('P' in text) or ('PB' in text)) & ('PF' not in text):
        return 'parking'
    elif 'SL' in text:
        return 'submerged_lands'
    else:
        return ''

split['zone_class'] = split.apply(zone_class, axis = 1)     

In [8]:
def prefix(row):
    """
    This function tags the prefix, Q or T
    Q: Qualified Classification are restrictions to ensure compatibility with surrounding property
    T: Tentative Zone Classifications are City Council requirements for public improvements
    """
    text = row.z1

    if 'Q' in text:
        return 'Q'
    elif 'T' in text:
        return 'T'
    else:
        return ''
    
split['prefix'] = split.apply(prefix, axis = 1)  

In [9]:
def height_district(row):
    """
    This function tags the height district based on
    Table 2 - Height Districts (Height, Stories, FAR & RFAR)
    """
    text = row.z2
            
    if (text == '1') or (text == '1D'):
        return '1'
    elif (text == '1L') or (text == '1LD'):
        return '1L'
    elif (text == '1VL') or (text == '1VLD'):
        return '1VL'
    elif (text == '1XL') or (text == '1XLD'):
        return '1XL'
    elif (text == '1SS') or (text == '1SSD'):
        return '1SS'
    elif (text == '2') or (text == '2D'):
        return '2'
    elif (text == '3') or (text == '3D'):
        return '3'
    elif (text == '4') or (text == '4D'):
        return '4'
    else:
        return ''
    
split['height_district'] = split.apply(height_district, axis = 1)  

In [10]:
def d_limits(row):
    """
    This function tags the development limits, which further restrict 
    height, FAR, % lot coverage, building setback. 
    Sometimes, height districts have additional development limits.
    Tag this as 0 or 1, and only if there are already height districts.
    If there are no height districts, then it's NaN.
    """
    
    yes_development_limits = ['1D', '1LD', '1VLD', '1XLD', '1SSD', 
                          '2D', '3D', '4D']
    no_development_limits = ['1', '1L', '1VL', '1XL', '1SS', 
                          '2', '3', '4']
    
    text = row.z2
    height = row.height_district
    
    if any(str in text for str in yes_development_limits) & (height != ''):
        return 1
    if any(str in text for str in no_development_limits) & (height != ''):
        return 0
    if height == '':
        return np.nan
    
split['d_limits'] = split.apply(d_limits, axis = 1)  