In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import os
import fiona

In [2]:
cnty = {'Alameda': 1.0,
'Contra Costa': 13.0,
'Marin': 41.0,
'Napa': 55.0,
'San Francisco': 75.0,
'San Mateo': 81.0,
'Santa Clara': 85.0,
'Solano': 95.0,
'Sonoma': 97.0}

ctyMap = pd.DataFrame(cnty.items(), columns=['CTYNAME', 'ctyCode'])

## 0 Data sources

In [None]:
# Input files
  # All input files are packed in "PLU_BOC_capacity_calculation_map.zip" at https://mtcdrive.app.box.com/file/651898444588
    
## Pacel 10
p10_raw = gpd.read_file('inputs/PLU_analysis.gdb', layer='p10_table')
p10_geo = gpd.read_file('inputs/p10_geo_shp.shp')

## parcel10 to pba40 basezoning code
pz10_raw = pd.read_csv('inputs/2020_03_06_zoning_parcels.csv')

## pba40 basezoning plu
plu10 = pd.read_csv('inputs/zoning_lookup.csv')

## BASIS BOC
p10_plu50_raw = gpd.read_file('inputs/PLU_analysis.gdb', layer='p10_boc_opt_b_v1d_tbl')

## planned zoning scenarios
zmods = pd.read_csv('inputs/03_06_2020_parcels_geography.csv')

## Building data to decide parcel status
blg10 = pd.read_csv('inputs/blg10.csv')

## 1 Merge data sets

### 1.1 P10 parcel zoining designations

In [6]:
p10 = p10_raw[['PARCEL_ID','geom_id_s','COUNTY_ID','jurisdiction','ACRES','LAND_VALUE','pda_id','zoningmodcat']]
print(p10.shape)
#display(p10.head())

## pacel to zoning code mapping
print(pz10.shape)
pz10 = pz10_raw[['PARCEL_ID','zoning_id','nodev_pba40']]
#display(pz10.head())

p10_z10 = p10.merge(pz10, on = 'PARCEL_ID', how = 'left')
#display(p10_z10.head())

## Check Number of parcels missing zoning designation
#z10_missing = p10_z10.loc[p10_z10['nodev_pba40'].isnull()]
#print(z10_missing.shape[0],z10_missing.shape[0]/pz10.shape[0])

(1956208, 8)
(1950733, 11)


### 1.2 parcel 10 with PBA40 zoning code PLU

In [7]:

# check duplicates in zoning id
plu10['id'] = plu10['id'].apply(lambda x: float(x))
plu10['jz_o'] = plu10['city'].str.cat(plu10['name'],sep=" ")
print(plu10.shape[0], len(plu10.id.unique()), len(plu10.jz_o.unique()))

# relabel p10 land plu info (used in PBA40)
plu10.columns = [i+'_10' for i in list(plu10)]
#display(plu10.head())

# merge PBA40 plu to p10
p10_plu10 = p10_z10.merge(plu10, left_on = 'zoning_id', right_on = 'id_10', how = 'left')
#display(p10_plu10.head())

# Check number of p10 records failed to find a matching PLU
#display(p10_plu10.loc[p10_plu10['jz_o_10'].isnull()])
print(p10_plu10.loc[p10_plu10['jz_o_10'].isnull()].shape[0] / p10_z10.shape[0])

5156 5156 4536
0.0034745793903306807


### 1.3 P10 with BASIS BOC

In [8]:
plu50 = p10_plu50_raw[['parcel_id','me','mt', 'mr', 'rb', 'rs', 'ih', 'iw', 'il', 'sc', 'ho', 'of', 'hm', 'ht', 'hs',
                       'max_height','max_dua','max_far','plu_id','plu_jurisdiction','plu_description','building_types_source','source']]

# relabel BASIS land plu info (to use in PBA50)
plu50.columns = ['PARCEL_ID'] + [i+'_18' for i in list(plu50)[1:]]
#display(plu50.head())

# merge PBA50 plu to p10
p10_plus = p10_plu10.merge(plu50, on = 'PARCEL_ID', how = 'left')

p10_plus.drop(columns = ['id_10','name_10','plandate_10','jz_o_10'],inplace = True)
#display(p10_plus.head())

### 1.4 Bring in Building data (b10) to determine parcel characteristics

In [9]:
print(blg10.shape[0], len(blg10.building_id.unique()), len(blg10.parcel_id.unique()))
#display(blg10.head())

# Assign parcel characteristics

# merge builing and parcel data w/ Outer-join
b10_p10 = blg10.merge(p10[['PARCEL_ID']],left_on = 'parcel_id',right_on = 'PARCEL_ID', how = 'outer')
print(b10_p10.shape)

# sum all values for multiple buildings within one parcel
pb10_v = b10_p10.groupby(['PARCEL_ID'])['improvement_value','residential_units','residential_sqft','non_residential_sqft',
                                      'building_sqft','redfin_sale_price','costar_rent'].sum().reset_index()

# chose the earliest built year for multiple buildings within one parcel
pb10_yr = b10_p10.groupby(['PARCEL_ID'])['year_built','building_id'].min().reset_index()

# parcel vacancy based on building type
b10_p10['dType'] = b10_p10['development_type_id']
blg10.loc[blg10['development_type_id'] == 0, 'dType'] = 'Vacant'
blg10.loc[blg10['development_type_id'] == 15, 'dType'] = 'Vacant'
pb10_vacent = b10_p10.loc[b10_p10['dType'] == 'Vacant'][['PARCEL_ID','dType']]

# merge
pb10_temp = pb10_v.merge(pb10_yr, on = 'PARCEL_ID', how = 'left').merge(pb10_vacent, on = 'PARCEL_ID', how = 'left')
print(pb10_temp.shape)
pb10_plus = p10_plus.merge(pb10_temp, on = 'PARCEL_ID', how = 'left')

# Investment-land ratio
pb10_plus['ILR'] = pb10_plus['improvement_value'] / pb10_plus['LAND_VALUE']
pb10_plus.loc[pb10_plus['LAND_VALUE'] == 0, 'ILR'] = 'n/a'

# Vacant parcels
pb10_plus['vacant'] = np.where((pb10_plus['building_id'].isnull()) | (pb10_plus['dType'] == 'Vacant') | 
                          ((pb10_plus['improvement_value'] == 0) & (pb10_plus['residential_units'] == 0) & 
                             (pb10_plus['residential_sqft'] == 0) & (pb10_plus['non_residential_sqft'] == 0) &
                             (pb10_plus['building_sqft'] == 0)), 'vacant', 'nonVacant')

# building age by year-built
pb10_plus['b_age'] = np.where(pb10_plus.year_built.isnull(), 'missing', 
                              np.where(pb10_plus.year_built < 1930, 'before 1930',
                                      np.where(pb10_plus.year_built < 1980, '1930-1980',
                                              np.where(pb10_plus.year_built < 2000, '1980-2000','after 2000'))))

#display(pb10_plus.head())

1843351 1843351 1843292
(1956269, 22)


  # This is added back by InteractiveShellApp.init_path()
  from ipykernel import kernelapp as app
  res_values = method(rvalues)


(1956208, 10)


### 1.5 Bring in zoning scenarios data

In [12]:
zmods.columns = list(zmods)[:-2] + ['nodev_pba50','jurisdiction_id']
#display(zmods.head())

# merge parcel data with zoning mods
pb10_plus.geom_id_s = pd.to_numeric(pb10_plus.geom_id_s)
pb10_plus_zmods = pb10_plus.merge(zmods, left_on = 'geom_id_s', right_on = 'geom_id', how = 'left')
#display(pb10_plus_zmods.head())
pb10_plus_zmods.columns = [x.upper() for x in pb10_plus_zmods.columns]
pb10_plus_zmods.rename(columns = {'PARCEL_ID_X': 'PARCEL_ID',
                                  'JURIS_ID': 'JURIS'}, inplace = True)

### 1.6 Export PLU BOC data to csv

In [66]:
p10_plu_boc = pb10_plus_zmods[['PARCEL_ID','COUNTY_ID','JURIS','PLU_ID_18','PLU_JURISDICTION_18','PLU_DESCRIPTION_18',
                    'MAX_FAR_10','MAX_DUA_10','MAX_DUA_18','MAX_FAR_18', 'MAX_HEIGHT_10','MAX_HEIGHT_18',
                    'HS_10','HT_10','HM_10','OF_10','HO_10','SC_10','IL_10','IW_10','IH_10','RS_10','RB_10','MR_10','MT_10','ME_10',
                    'ME_18','MT_18','MR_18','RB_18','RS_18','IH_18','IW_18','IL_18','SC_18','HO_18','OF_18','HM_18','HT_18','HS_18',
                    'BUILDING_TYPES_SOURCE_18','SOURCE_18']]
p10_plu_boc.to_csv('outputs/p10_plu_boc_allAttrs.csv',index = False)

### will visualize in Tableau

## 2 Capacity statistics

In [68]:
# select needed fields
plu_main = pb10_plus_zmods.loc[pb10_plus_zmods['COUNTY_ID'] > 0][['PARCEL_ID','COUNTY_ID','JURIS','B_AGE','GEOM_ID_S','ACRES',
                    'MAX_FAR_10','MAX_DUA_10','MAX_DUA_18','MAX_FAR_18', 'MAX_HEIGHT_10','MAX_HEIGHT_18',
                    'HS_10','HT_10','HM_10','OF_10','HO_10','SC_10','IL_10','IW_10','IH_10','RS_10','RB_10','MR_10','MT_10','ME_10',
                    'ME_18','MT_18','MR_18','RB_18','RS_18','IH_18','IW_18','IL_18','SC_18','HO_18','OF_18','HM_18','HT_18','HS_18',
                    'YEAR_BUILT','ILR','VACANT','PBA50ZONINGMODCAT','NODEV_PBA40','NODEV_PBA50',
                    'BUILDING_TYPES_SOURCE_18','SOURCE_18']]

# Convert all types to numeric to enable calculation
l = ['HS_10','HT_10','HM_10','OF_10','HO_10','SC_10','IL_10','IW_10','IH_10','RS_10','RB_10','MR_10','MT_10','ME_10',
     'ME_18','MT_18','MR_18','RB_18','RS_18','IH_18','IW_18','IL_18','SC_18','HO_18','OF_18','HM_18','HT_18','HS_18',
     'MAX_FAR_10','MAX_DUA_10','MAX_DUA_18','MAX_FAR_18','MAX_HEIGHT_10','MAX_HEIGHT_18']
for i in l:
    plu_main[i] = pd.to_numeric(plu_main[i], errors='coerce')

# fill 'NaN' with 0
plu_main_cy = plu_main.copy()
plu_main.update(plu_main[l].fillna(0))

### 2.1 Allowed Development Type Statistics

In [69]:
cty = ctyMap.copy()
cty.set_index('ctyCode',inplace = True)

dfs = []
devType = ['HM','MR','HS','RS','OF','IW','IL','IH','HT','HO','SC','RB','MT','ME']
for i in devType:
    plu = i+'_10'
    boc = i+'_18'
    df = plu_main[['COUNTY_ID']+ [plu,boc]].groupby(['COUNTY_ID']).sum().reset_index()
    df.set_index('COUNTY_ID',inplace = True)
    dfs.append(df)

plu_boc_parcelCount_comp = pd.concat([cty] + dfs, axis=1,join='inner')
#display(plu_boc_parcelCount_comp)

for i in devType:
    plu = i+'_10'
    boc = i+'_18'
    plu_boc_parcelCount_comp[i+'_diff'] = plu_boc_parcelCount_comp[boc] - plu_boc_parcelCount_comp[plu]
    plu_boc_parcelCount_comp[i+'_diff_pct'] = plu_boc_parcelCount_comp[i+'_diff']/plu_boc_parcelCount_comp[plu]

plu_boc_type_diff = plu_boc_parcelCount_comp[[x+'_diff' for x in devType] + [x+'_diff_pct' for x in devType]]
#display(plu_boc_type_diff)

### 2.2 Caculate Build out capacity for each parcel

In [70]:
# Calculate capacity

def cap(df,nonResLs,reLs,zoning_yr,pba):
    """
    df: parcel data with PBA40 PLU ("_10") and BASIS BOC ("_18") attributes 
    nonResLs: a list of non-residential development types
    reLs: a list of residential development types, including HS, HT, HM
    zoning_yr: string, '_10' or '_18'
    pba: string, '_PBA40' or '_PBA50'
    """
    
    # a parcel is 'allowNonRes' is at least one of the non-residential development types is allowed
    df['allowNonRes'+zoning_yr] = df[nonResLs].sum(axis=1) > 0 

    # a parcel is 'allowRes' is at least one of the residential development types is allowed
    df['allowRes'+zoning_yr] = df[reLs].sum(axis=1) > 0
    
    # DUA calculations apply to parcels 'allowRes' and not marked as "nodev"
    df['units'+zoning_yr] = df['ACRES'] * df['MAX_DUA'+zoning_yr]
    
    # if DUA is null or zero, calculate assuming a HU is 1200 square feet and a floor is 11 feet high
    calUnits_idx = (df['allowRes'+zoning_yr] == True) & (df['MAX_DUA'+zoning_yr] == 0) & (df['NODEV'+pba] == 0)
    df.loc[calUnits_idx,'units'+zoning_yr] = df.loc[calUnits_idx,'ACRES'] * 43560 / 1200
    df.loc[calUnits_idx,'calc_unit'+zoning_yr] = 'Yuqi_calc'
    
    df.loc[(df['allowRes'+zoning_yr]== False) | (df['NODEV'+pba] == 1) ,'units'+zoning_yr] = 0

    # FAR calculations apply to parcels 'allowNonRes' and not marked as "nodev"
    df['sqft'+zoning_yr] = df['ACRES'] * df['MAX_FAR'+zoning_yr] * 43560
    
    # if FAR is null or zero, calculate assuming a floor is eleven feet hight to do the calc and land coverage 50%
    calSQFT_idx = (df['allowNonRes'+zoning_yr] == True) & (df['MAX_FAR'+zoning_yr] == 0) & (df['NODEV'+pba] == 0)
    df.loc[calSQFT_idx,'sqft'+zoning_yr] = df.loc[calSQFT_idx,'ACRES'] * 0.5 * (df.loc[calSQFT_idx,'MAX_HEIGHT'+zoning_yr] / 11)
    df.loc[calSQFT_idx,'calc_sqft'+zoning_yr] = 'Yuqi_calc'
    
    df.loc[(df['allowNonRes'+zoning_yr]== False) | (df['NODEV'+pba] == 1),'sqft'+zoning_yr] = 0
    
    df['Ksqft'+zoning_yr] = df['sqft'+zoning_yr].apply(lambda x: x*0.001)
    
    # calculate non-residential capacity in employment
    df['emp'+zoning_yr] = df['sqft'+zoning_yr].apply(lambda x: x / 350)
    office_idx = (df['OF'+zoning_yr] == 1) & (df[['HO'+zoning_yr,'SC'+zoning_yr,'IL'+zoning_yr,'IW'+zoning_yr,
                                                  'IH'+zoning_yr,'RS'+zoning_yr,'RB'+zoning_yr,'MR'+zoning_yr,
                                                  'MT'+zoning_yr,'ME'+zoning_yr]].sum(axis = 1) == 0)
    df.loc[office_idx,'emp'+zoning_yr] = df.loc[office_idx,'sqft'+zoning_yr].apply(lambda x: x / 175)
    indus_idx = (df[['IL'+zoning_yr,'IW'+zoning_yr,'IH'+zoning_yr]].sum(axis = 1) > 0) & (
        df[['OF'+zoning_yr,'HO'+zoning_yr,'SC'+zoning_yr,'RS'+zoning_yr,'RB'+zoning_yr,'MR'+zoning_yr,
            'MT'+zoning_yr,'ME'+zoning_yr]].sum(axis = 1) == 0)
    df.loc[indus_idx,'emp'+zoning_yr] = df.loc[indus_idx,'sqft'+zoning_yr].apply(lambda x: x / 500)

    return df[['PARCEL_ID','allowRes'+zoning_yr,'allowNonRes'+zoning_yr,'units'+zoning_yr,'Ksqft'+zoning_yr,'emp'+zoning_yr,'calc_unit'+zoning_yr,'calc_sqft'+zoning_yr]] 

In [71]:
nonRes = ['OF','HO','SC','IL','IW','IH','RS','RB','MR','MT','ME']
Res = ['HS', 'HT', 'HM','MR']

# Calculate PBA40 PLU capacity 
nonRes_10 = [x+'_10' for x in nonRes]
Res_10 = [x+'_10' for x in Res]
plu_main_10 = plu_main.copy()
for i in ['allowRes_10','allowNonRes_10','units_10','sqft_10','Ksqft_10','emp_10',
          'calc_unit_10','calc_sqft_10']:
    plu_main_10[i] = np.nan
cap_10 = cap(plu_main_10,nonRes_10,Res_10,'_10','_PBA40')

# Calculate PBA50 BOC capacity 
nonRes_18 = [x+'_18' for x in nonRes]
Res_18 = [x+'_18' for x in Res]
plu_main_18 = plu_main.copy()
for i in ['allowRes_18','allowNonRes_18','units_18','sqft_18','Ksqft_18','emp_18',
          'calc_unit_18','calc_sqft_18']:
    plu_main_18[i] = np.nan
cap_18 = cap(plu_main_18,nonRes_18,Res_18,'_18','_PBA50')

p10_capacity_temp = plu_main.merge(cap_10, on = 'PARCEL_ID', how = 'left').merge(
    cap_18, on ='PARCEL_ID', how = 'left').merge(
    ctyMap, left_on = 'COUNTY_ID', right_on = 'ctyCode', how = 'left')
#display(p10_capacity_temp.head())

In [72]:
## replace BOC columns with initial data that contains NaN to reflect "real" BASIS BOC data
na_cols = ['ME_18','MT_18','MR_18','RB_18','RS_18','IH_18','IW_18','IL_18','SC_18','HO_18','OF_18','HM_18','HT_18','HS_18',
           'MAX_DUA_18','MAX_FAR_18', 'MAX_HEIGHT_18','YEAR_BUILT','BUILDING_TYPES_SOURCE_18','SOURCE_18']
boc_wNA = plu_main_cy[['PARCEL_ID'] + na_cols]
p10_capacity = p10_capacity_temp.drop(columns = na_cols)
p10_capacity = p10_capacity.merge(boc_wNA, on = 'PARCEL_ID', how = 'left')
p10_capacity.drop(columns = ['ctyCode'], inplace = True)

## export data // will visualize in Tableau
p10_capacity.to_csv('outputs/devCapacity_allAttrs.csv', index = False)

### 2.3 Zoning build-out-capacity at jurisdiction and county levels

In [16]:
"""
# BOC by jurisdiction function
def boc_j(df):
    boc_j = df.groupby(['JURIS'])['ACRES','units_10','units_18','sqft_10','sqft_18'].sum()

    boc_j['unit_diff'] = boc_j['units_18'] - boc_j['units_10']
    boc_j['sqft_diff'] = boc_j['sqft_18'] - boc_j['sqft_10']
    boc_j['unit_diff_pct'] = boc_j['unit_diff'] / boc_j['units_10']
    boc_j['sqft_diff_pct'] = boc_j['sqft_diff'] / boc_j['sqft_10']

    for i in ['units_10','units_18','unit_diff','sqft_10','sqft_18','sqft_diff']:
        boc_j[i] = boc_j[i].apply(lambda x: f'{int(x):,}')
    return boc_j

# BOC by county function
def boc_c(df):
    boc_cty = df.groupby(['COUNTY_ID'])['ACRES','units_10','units_18','sqft_10','sqft_18'].sum()
    boc_cty['unit_diff'] = boc_cty['units_18'] - boc_cty['units_10']
    boc_cty['sqft_diff'] = boc_cty['sqft_18'] - boc_cty['sqft_10']
    boc_cty['unit_diff_pct'] = boc_cty['unit_diff'] / boc_cty['units_10']
    boc_cty['sqft_diff_pct'] = boc_cty['sqft_diff'] / boc_cty['sqft_10']

    for i in ['units_10','units_18','unit_diff','sqft_10','sqft_18','sqft_diff']:
        boc_cty[i] = boc_cty[i].apply(lambda x: f'{int(x):,}')

    boc_cty = boc_cty.reset_index()
    boc_cty = boc_cty.loc[boc_cty['COUNTY_ID'] > 0]
    return boc_cty

# all parcels statistics
all_boc_j = boc_j(p10_capacity)
all_boc_j.to_csv('outputs/all_boc_jurisdiction.csv')

all_boc_c = boc_c(p10_capacity)
all_boc_c.to_csv('outputs/all_boc_county.csv')

# vacant parcel statistics
p_vac = p10_capacity.loc[p10_capacity.VACANT == 'vacant']

vac_boc_j = boc_j(p_vac)
vac_boc_j.to_csv('outputs/vac_boc_jurisdiction.csv')

vac_boc_c = boc_c(p_vac)
vac_boc_c.to_csv('outputs/vac_boc_county.csv')

# low ILR parcel statistics (threadhold 0.2)
p10_capacity.ILR = pd.to_numeric(p10_capacity.ILR, errors='coerce')
p_low_ILR = p10_capacity.loc[p10_capacity.ILR < 0.2]

low_ILR_boc_j = boc_j(p_low_ILR)
low_ILR_boc_j.to_csv('outputs/low_ILR_boc_jurisdiction.csv')

low_ILR_boc_c = boc_c(p_low_ILR)
low_ILR_boc_c.to_csv('outputs/low_ILR_boc_county.csv')

# Old building parcel statistics (1930-1980)
p10_capacity.year_built = pd.to_numeric(p10_capacity.YEAR_BUILT, errors='coerce')
p_old = p10_capacity.loc[(p10_capacity.YEAR_BUILT < 1980) & (p10_capacity.YEAR_BUILT >= 1930)]

old_boc_j = boc_j(p_old)
old_boc_j.to_csv('outputs/old_boc_jurisdiction.csv')

old_boc_c = boc_c(p_old)
old_boc_c.to_csv('outputs/old_boc_county.csv')
"""

## 3 PLU BOC Mapping

In [73]:
p10_pluboc_allAttrs = p10_capacity.merge(
    p10_geo[['PARCEL_ID','geometry']], on = 'PARCEL_ID', how = 'left').merge(
    p10_plu50_raw[['parcel_id','plu_jurisdiction','plu_code','plu_description']], left_on = 'PARCEL_ID', right_on = 'parcel_id', how = 'left')

p10_pluboc_allAttrs.rename(columns={'plu_jurisdiction': 'PLU_JURIS',
                             'plu_code': 'PLU_CODE', 
                             'plu_description': 'PLU_DESC', 
                             'geometry': 'GEOMETRY',
                             'BUILDING_TYPES_SOURCE_18':'B_TYPE_SRC',
                             'MAX_HEIGHT_10': 'MAX_H_10',
                             'MAX_HEIGHT_18': 'MAX_H_18',
                             'calc_unit_10': 'cal_u_10', 
                             'calc_unit_18': 'cal_u_18',
                             'calc_sqft_10': 'cal_sf_10',
                             'calc_sqft_18': 'cal_sf_18',
                             'allowNonRes_10': 'nonRes1_10', 
                             'allowRes_10': 'res1_10',
                             'allowNonRes_18': 'nonRes1_18', 
                             'allowRes_18': 'res1_18',
                             'NODEV_PBA40': 'NODEV_40', 
                             'NODEV_PBA50': 'NODEV_50',
                             'SOURCE_18': 'SRC'}, inplace = True)

p10_pluboc_allAttrs = p10_pluboc_allAttrs.where(pd.notnull(p10_pluboc_allAttrs), None)

pAttr = ['PARCEL_ID','ACRES','CTYNAME', 'COUNTY_ID', 'GEOMETRY','JURIS','PLU_JURIS', 'PLU_CODE', 'PLU_DESC']
pCond = ['B_AGE', 'YEAR_BUILT', 'ILR', 'VACANT']
devType = ['HS','HT','HM','OF','HO','SC','IL','IW','IH','RS','RB','MR','MT','ME']
noDev = ['NODEV']
allowType = ['res1','nonRes1']
intens = ['MAX_H','MAX_DUA','MAX_FAR']
capacity = ['units','sqft']
srs = ['B_TYPE_SRC', 'SRC','cal_u_10','cal_u_18','cal_sf_10','cal_sf_18']

In [74]:
def byCounty(df,cty_id):
    df_cty = df.loc[df.CTYNAME == cty_id]
    return df_cty

def exportData(df,fname):
    if (df.shape[0] > 0):
        df_geo = gpd.GeoDataFrame(df, geometry='GEOMETRY')
        df_geo.to_file('outputs/mapping/' + fname + '.shp')
        df_csv = df.drop(columns = ['GEOMETRY'])
        df_csv.to_csv('outputs/mapping/' + fname + '.csv', index = False)
    else:
        print('no records')

### 3.1 Export All Attributes

In [42]:
%%time
# export all attributes
"""
p10_pluboc_clean = p10_pluboc_allAttrs[pAttr + \
                              [x + '_10' for x in devType] + [x + '_18' for x in devType] + \
                              [x + '_10' for x in intens] + [x + '_18' for x in intens] + \
                              [x + '_10' for x in allowType] + [x + '_18' for x in allowType] + \
                              ['NODEV_40','NODEV_50'] + \
                              [x + '_10' for x in capacity] + [x + '_18' for x in capacity] + \
                              srs]

exportData(p10_pluboc_clean,'p10_pluboc_allAttrs')

for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(p10_pluboc_clean,i)
    print('exporting ' + i)
    fname = 'pluboc_' + i
    exportDate(df,fname)
"""

Wall time: 1.02 ms


"\np10_pluboc_clean = p10_pluboc_allAttrs[pAttr +                               [x + '_10' for x in devType] + [x + '_18' for x in devType] +                               [x + '_10' for x in intens] + [x + '_18' for x in intens] +                               [x + '_10' for x in allowType] + [x + '_18' for x in allowType] +                               ['NODEV_40','NODEV_50'] +                               [x + '_10' for x in capacity] + [x + '_18' for x in capacity] +                               srs]\n\nexportData(p10_pluboc_clean,'p10_pluboc_allAttrs')\n\nfor i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:\n    df = byCounty(p10_pluboc_clean,i)\n    print('exporting ' + i)\n    fname = 'pluboc_' + i\n    exportDate(df,fname)\n"

In [21]:
%%time

## export intensity by county
intensity = p10_pluboc_allAttrs[pAttr + [x + '_10' for x in intens] + [x + '_18' for x in intens] + srs[:2]]
exportData(intensity,'intensity')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(intensity,i)
    print('exporting ' + i)
    fname = 'devIntensity_' + i
    exportData(df,fname)
"""
# exportallowed development type by county
allowDevType = p10_pluboc_allAttrs[pAttr + [x + '_10' for x in devType] + [x + '_18' for x in devType] + srs[:2]]
exportData(allowDevType,'allowDevType')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(allowDevType,i)
    print('exporting ' + i)
    fname = 'allowDevType_' + i
    exportData(df,fname)
"""

Wall time: 37min 49s


"\nfor i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:\n    df = byCounty(allowDevType,i)\n    print('exporting ' + i)\n    fname = 'allowDevType_' + i\n    exportData(df,fname)\n"

### 3.2 Selected Attributes

In [75]:
%%time
## HM allowed in BASIS BOC
hm_boc = p10_pluboc_allAttrs.loc[p10_pluboc_allAttrs.HM_18 == 1][pAttr + ['HM_18'] + srs[:2]]
exportData(hm_boc,'HM_BOC_allow')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(hm_boc,i)
    print('exporting ' + i)
    fname = 'HM_BOC_allow_' + i
    exportData(df,fname)
"""

Wall time: 3min 54s


"\nfor i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:\n    df = byCounty(hm_boc,i)\n    print('exporting ' + i)\n    fname = 'HM_BOC_allow_' + i\n    exportData(df,fname)\n"

In [76]:
%%time
## HM allowed: 4 categories, in pba40 plu only, in BASIS only, in both, in neither
p10_pluboc_allAttrs['HM_comp'] = np.nan
p10_pluboc_allAttrs['HM_comp'] = np.where((p10_pluboc_allAttrs.HM_10 == 1) & (p10_pluboc_allAttrs.HM_18 == 1),'both allow',np.where(
        (p10_pluboc_allAttrs.HM_10 == 1) & (p10_pluboc_allAttrs.HM_18 == 0),'only pba40 allow',np.where(
        (p10_pluboc_allAttrs.HM_10 == 0) & (p10_pluboc_allAttrs.HM_18 == 1),'only BASIS allow',np.where(
        (p10_pluboc_allAttrs.HM_10 == 0) & (p10_pluboc_allAttrs.HM_18 == 0),'both not allow',np.where(
        (p10_pluboc_allAttrs.HM_10.notnull()) & (p10_pluboc_allAttrs.HM_18.isnull()),'missing BASIS BOC','other')))))

hm_comp = p10_pluboc_allAttrs[pAttr + ['HM_comp'] + srs[:2]]
exportData(hm_comp,'HM_comp')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(hm_comp,i)
    print('exporting ' + i)
    fname = 'HM_comp_' + i
    exportData(df,fname)
"""

Wall time: 12min 7s


"\nfor i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:\n    df = byCounty(hm_comp,i)\n    print('exporting ' + i)\n    fname = 'HM_comp_' + i\n    exportData(df,fname)\n"

In [50]:
print(list(p10_pluboc_allAttrs))

['PARCEL_ID', 'COUNTY_ID', 'JURIS', 'B_AGE', 'GEOM_ID_S', 'ACRES', 'MAX_FAR_10', 'MAX_DUA_10', 'MAX_DUA_18', 'MAX_FAR_18', 'MAX_H_10', 'MAX_H_18', 'HS_10', 'HT_10', 'HM_10', 'OF_10', 'HO_10', 'SC_10', 'IL_10', 'IW_10', 'IH_10', 'RS_10', 'RB_10', 'MR_10', 'MT_10', 'ME_10', 'ILR', 'VACANT', 'PBA50ZONINGMODCAT', 'NODEV_40', 'NODEV_50', 'res1_10', 'nonRes1_10', 'units_10', 'Ksqft_10', 'emp_10', 'cal_u_10', 'cal_sf_10', 'res1_18', 'nonRes1_18', 'units_18', 'Ksqft_18', 'emp_18', 'cal_u_18', 'cal_sf_18', 'CTYNAME', 'ME_18', 'MT_18', 'MR_18', 'RB_18', 'RS_18', 'IH_18', 'IW_18', 'IL_18', 'SC_18', 'HO_18', 'OF_18', 'HM_18', 'HT_18', 'HS_18', 'YEAR_BUILT', 'B_TYPE_SRC', 'SRC', 'GEOMETRY', 'parcel_id', 'PLU_JURIS', 'PLU_CODE', 'PLU_DESC', 'HM_comp']


In [77]:
%%time
## MR allowed but HM no allowed in BASIS BOC 
p10_pluboc_allAttrs['MR_noHM_boc'] = np.nan
p10_pluboc_allAttrs['MR_noHM_boc'] = np.where((p10_pluboc_allAttrs.HM_18 == 0) & 
                                              (p10_pluboc_allAttrs.MR_18 == 1),'MR_noHM_boc',np.where((
                                                  p10_pluboc_allAttrs.HM_18.isnull()) & 
                                              (p10_pluboc_allAttrs.MR_18 == 1),'MR_nanHM_boc','other'))

MR_noHM_boc = p10_pluboc_allAttrs.query("MR_noHM_boc == 'MR_noHM_boc' | MR_noHM_boc == 'MR_nanHM_boc'")
MR_noHM_boc = MR_noHM_boc[pAttr + ['MR_noHM_boc'] + srs[:2]]
exportData(MR_noHM_boc,'MR_noHM_boc')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(MR_noHM_boc,i)
    print('exporting ' + i)
    fname = 'MR_noHM_boc_' + i
    exportData(df,fname)
"""

Wall time: 42.2 s


"\nfor i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:\n    df = byCounty(MR_noHM_boc,i)\n    print('exporting ' + i)\n    fname = 'MR_noHM_boc_' + i\n    exportData(df,fname)\n"

In [78]:
%%time
## integrated residential capacity (HM HS HT or MR allowed AND what is DUA: raw or calculated from FAR or height)
resOnly = p10_pluboc_allAttrs.loc[(p10_pluboc_allAttrs.res1_10 == True) | (p10_pluboc_allAttrs.res1_18 == True)]

## chk if correctly categorized parcels that allow residential
display(resOnly.loc[resOnly.res1_18 == True][['HM_18','HS_18','HT_18','MR_18']].sum(axis = 1).value_counts())
display(resOnly.loc[resOnly.res1_10 == True][['HM_10','HS_10','HT_10','MR_10']].sum(axis = 1).value_counts())

## chk if missing units value
display(resOnly.loc[(resOnly.units_18 == 0) & (resOnly.res1_18 == True) & (resOnly.NODEV_50 == 0)].shape)
display(resOnly.loc[(resOnly.units_10 == 0) & (resOnly.res1_10 == True) & (resOnly.NODEV_40 == 0)].shape)

## export
res_cap_18 = resOnly[pAttr + ['units_18'] + pCond + srs[:2] + ['cal_u_18']]
exportData(res_cap_18,'res_capacity_18')

res_cap_10 = resOnly[pAttr + ['units_10'] + pCond + srs[:2] + ['cal_u_10']]
exportData(res_cap_10,'res_capacity_10')

res_DUA_18 = resOnly[pAttr + ['MAX_DUA_18'] + pCond + srs[:2]]
exportData(res_DUA_18,'res_DUA_18')

res_DUA_10 = resOnly[pAttr + ['MAX_DUA_10'] + pCond + srs[:2]]
exportData(res_DUA_10,'res_DUA_10')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(resOnly,i)
    print('exporting ' + i)
    fname = 'resCapacity_' + i
    exportData(df,fname)
"""

1.0    712129
2.0    423642
3.0    251496
4.0    198517
dtype: int64

1    858003
2    608412
3    267332
4     26752
dtype: int64

(0, 70)

(0, 70)

Wall time: 2h 41min 16s


"\nfor i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:\n    df = byCounty(resOnly,i)\n    print('exporting ' + i)\n    fname = 'resCapacity_' + i\n    exportData(df,fname)\n"

In [79]:
%%time
## Employment capacity in thousands of square feet: calculate for all categories that allow employment 
## Employment capacity in employees
    ## Assume a parcel that only allows office is 175 sqft per employee; 
    ## Assume a parcel that only allows IH IL or IW is 500 sqft per employee; 
    ## assume any other parcels with comm is 350 per employee

nonResOnly = p10_pluboc_allAttrs.query('nonRes1_10 == True | nonRes1_18 == True')

## chk if correctly categorized parcels that allow non-residential
display(nonResOnly.loc[nonResOnly.nonRes1_18 == True][[x + '_18' for x in nonRes]].sum(axis = 1).value_counts())
display(nonResOnly.loc[nonResOnly.nonRes1_10 == True][[x + '_10' for x in nonRes]].sum(axis = 1).value_counts())

## chk: if missing sqft value
display(nonResOnly.loc[(nonResOnly.Ksqft_18 == 0) & (nonResOnly.nonRes1_18 == True) & (nonResOnly.NODEV_50 == 0)].shape)
display(nonResOnly.loc[(nonResOnly.Ksqft_10 == 0) & (nonResOnly.nonRes1_10 == True) & (nonResOnly.NODEV_40 == 0)].shape)

display(nonResOnly.loc[(nonResOnly.Ksqft_18 == 0) & (nonResOnly.nonRes1_18 == True) & (nonResOnly.NODEV_50 == 0)]['MAX_H_18'].value_counts())
display(nonResOnly.loc[(nonResOnly.Ksqft_10 == 0) & (nonResOnly.nonRes1_10 == True) & (nonResOnly.NODEV_40 == 0)]['MAX_H_10'].value_counts())

## export
nonRes_cap_18 = nonResOnly[pAttr + ['Ksqft_18'] + pCond + srs[:2] + ['cal_sf_18']]
exportData(nonRes_cap_18,'nonRes_capacity_18')

nonRes_cap_10 = nonResOnly[pAttr + ['Ksqft_10'] + pCond + srs[:2] + ['cal_sf_10']]
exportData(nonRes_cap_10,'nonRes_capacity_10')

nonRes_emp_18 = nonResOnly[pAttr + ['emp_18'] + pCond + srs[:2] + ['cal_sf_18']]
exportData(nonRes_emp_18,'nonRes_emp_18')

nonRes_emp_10 = nonResOnly[pAttr + ['emp_10'] + pCond + srs[:2] + ['cal_sf_10']]
exportData(nonRes_emp_10,'nonRes_emp_10')

1.0     274193
2.0     132612
4.0     109872
5.0      96042
6.0      74891
3.0      52410
9.0      46650
7.0      37096
8.0      28013
10.0     26456
11.0      8221
13.0         2
dtype: int64

1     385896
2     346866
3      58542
4      23700
5      16669
7      14312
6      13820
8       2373
9       1660
10       224
11        13
dtype: int64

(157405, 70)

(44662, 70)

0.0    116
Name: MAX_H_18, dtype: int64

0.0    44662
Name: MAX_H_10, dtype: int64

Wall time: 1h 2min 27s
