In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import fiona
import os
import glob
from datetime import datetime
pd.options.display.max_rows = 100

In [2]:
if os.getenv('USERNAME')    =='ywang':
    BOX_dir                 = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim 1.5\\PBA50'.format(os.getenv('USERNAME'))
    BOX_smelt_dir           = 'C:\\Users\\{}\\Box\\baydata\\smelt\\2020 03 12'.format(os.getenv('USERNAME'))
    GitHub_petrale_dir      = 'C:\\Users\\ywang\\Documents\\GitHub\\petrale'
    GitHub_urbansim_dir     = 'C:\\Users\\ywang\\Documents\\GitHub\\bayarea_urbansim\\data'

elif os.getenv('USERNAME')  =='lzorn':
    BOX_dir                 = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim 1.5\\PBA50'.format(os.getenv('USERNAME'))
    BOX_smelt_dir           = 'C:\\Users\\{}\\Box\\baydata\\smelt\\2020 03 12'.format(os.getenv('USERNAME'))
    GitHub_petrale_dir      = 'X:\\petrale'
    GitHub_urbansim_dir     = 'X:\\bayarea_urbansim\\data'

# input file locations
pba40_zoning_box_dir    = os.path.join(BOX_dir, 'OLD PBA50 Large General Input Data')
pba50_zoningmod_dir     = os.path.join(BOX_dir, 'Policies\\Zoning Modifications')
other_inputs_dir        = os.path.join(BOX_dir, 'Policies\\Base zoning\\inputs')
    
# output file location
data_output_dir         = os.path.join(BOX_dir, 'Policies\\Base zoning\\outputs')

# See Dataset_Field_Definitions_Phase1.xlsx, Build Out Capacity worksheet
# https://mtcdrive.box.com/s/efbpxbz8553e90eljvlnnq20465whyiv
ALLOWED_BUILDING_TYPE_CODES = ["HS","HT","HM","OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]
RES_BUILDING_TYPE_CODES     = ["HS","HT","HM",                                        "MR"          ]
NONRES_BUILDING_TYPE_CODES  = [               "OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]

# used in impute_max_dua() and impute_max_far()
SQUARE_FEET_PER_ACRE                = 43560.0
SQUARE_FEET_PER_DU                  = 1200.0
FEET_PER_STORY                      = 11.0
PARCEL_USE_EFFICIENCY               = 0.5
SQUARE_FEET_PER_EMPLOYEE            = 350.0
SQUARE_FEET_PER_EMPLOYEE_OFFICE     = 175.0
SQUARE_FEET_PER_EMPLOYEE_INDUSTRIAL = 500.0

today = datetime.today().strftime('%Y_%m_%d')

In [3]:
## Basemap parcels
basemap_p10_file = os.path.join(BOX_smelt_dir, 'p10.csv')
print(basemap_p10_file)
basemap_p10 = pd.read_csv(
    basemap_p10_file,
    usecols =['PARCEL_ID','geom_id_s','ACRES','LAND_VALUE'],
    dtype   ={'PARCEL_ID':np.float64, 'geom_id_s':str, 'ACRES':np.float64, 'LAND_VALUE':np.float64})
print("Read {:,} rows from {}".format(len(basemap_p10), basemap_p10_file))
display(basemap_p10.head())

C:\Users\ywang\Box\baydata\smelt\2020 03 12\p10.csv
Read 1,956,208 rows from C:\Users\ywang\Box\baydata\smelt\2020 03 12\p10.csv


Unnamed: 0,PARCEL_ID,LAND_VALUE,ACRES,geom_id_s
0,229116.0,0.0,3.36052,10305106092872
1,244166.0,0.0,1.294423,11107351665227
2,202378.0,6036500.0,14.993605,11030175960628
3,2004420.0,179954.0,316.247146,6381677629073
4,340332.0,0.0,0.621275,314875459798


In [4]:
###### P10 parcels zoining designations

## pacel to zoning code mapping
pba40_pz_file = os.path.join(pba40_zoning_box_dir, '2015_12_21_zoning_parcels.csv')
pba40_pz = pd.read_csv(
    pba40_pz_file,
    usecols = ['geom_id','zoning_id','nodev'],
    dtype = {'geom_id':str, 'zoning_id':np.float64, 'nodev_pba40':np.int})

print("Read {:,} rows from {}".format(len(pba40_pz), pba40_pz_file))
display(pba40_pz.head())

## add zoning_id, nodev_pba40 columns to p10
p10_pba40_pz = pd.merge(left=basemap_p10, right=pba40_pz, left_on='geom_id_s', right_on = 'geom_id', how='left')
p10_pba40_pz.rename(columns={'nodev':'nodev_pba40'}, inplace=True)
#display(p10_pba40_pz.head())

## Check Number of parcels missing zoning designation
p10_pba40_pz_missing = p10_pba40_pz.loc[p10_pba40_pz['zoning_id'].isnull()]
print("Out of {0:,} p10 parcels, {1:,} or {2:.1f}% are missing 'zoning_id' values".format(
    len(p10_pba40_pz), len(p10_pba40_pz_missing), 100.0*len(p10_pba40_pz_missing)/len(p10_pba40_pz)))

Read 1,950,733 rows from C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\PBA50\OLD PBA50 Large General Input Data\2015_12_21_zoning_parcels.csv


Unnamed: 0,geom_id,zoning_id,nodev
0,1846247885201,12202.0,0
1,11768793521677,12204.0,0
2,807545210880,12204.0,0
3,8785012057974,12204.0,0
4,14057552282712,12204.0,0


Out of 1,956,208 p10 parcels, 6,797 or 0.3% are missing 'zoning_id' values


In [5]:
###### P10 parcels with PBA40 zoning code PLU

pba40_plu_file = os.path.join(GitHub_urbansim_dir, 'zoning_lookup.csv')
pba40_plu = pd.read_csv(pba40_plu_file, dtype={'id':float})
print("Read {:,} rows from {}".format(len(pba40_plu), pba40_plu_file))
# coerce this column to float -- it's a string for some reason
pba40_plu['SC'] = pd.to_numeric(pba40_plu['SC'], errors='coerce')

# append _pba40 to column names
rename_cols = dict((col,col+"_pba40") for col in pba40_plu.columns.values)
pba40_plu.rename(columns=rename_cols, inplace=True)
display(pba40_plu.head())

# check duplicates in zoning id
pba40_plu['jz_o'] = pba40_plu['city_pba40'].str.cat(pba40_plu['name_pba40'],sep=" ")
print("Out of {:,} rows in pba40_plu, {:,} have unique values of 'id', {:,} have unique values of 'jz_o'".format(
    len(pba40_plu), len(pba40_plu.id_pba40.unique()), len(pba40_plu.jz_o.unique())))

# using the zoning_id, get the pba40 zoning data (intensities, allowed building types)
p10_pba40_plu = pd.merge(left=p10_pba40_pz, right=pba40_plu, left_on='zoning_id', right_on='id_pba40', how='left')

# Check number of p10 records failed to find a matching PLU
p10_pba40_plu_missing = p10_pba40_plu.loc[p10_pba40_plu['jz_o'].isnull()]
print("Out of {0:,} rows in p10_pba40_plu, {1:,} or {2:.1f}% are missing 'jz_o' values".format(
    len(p10_pba40_plu), len(p10_pba40_plu_missing), 100.0*len(p10_pba40_plu_missing)/len(p10_pba40_plu)))

display(p10_pba40_plu.head())

Read 5,156 rows from C:\Users\ywang\Documents\GitHub\bayarea_urbansim\data\zoning_lookup.csv


Unnamed: 0,id_pba40,juris_pba40,city_pba40,name_pba40,max_far_pba40,max_height_pba40,max_dua_pba40,max_du_per_parcel_pba40,HS_pba40,HT_pba40,...,SC_pba40,IL_pba40,IW_pba40,IH_pba40,RS_pba40,RB_pba40,MR_pba40,MT_pba40,ME_pba40,plandate_pba40
0,2101.0,1.0,Albany,RHD,0.5,35.0,9.0,,1,0,...,0.0,0,0,0,0,0,0,0,0,
1,2102.0,1.0,Albany,R-1,0.55,28.0,12.0,,1,0,...,1.0,0,0,0,0,0,0,0,0,
2,2103.0,1.0,Albany,R-2,0.55,35.0,35.0,,1,1,...,1.0,0,0,0,0,0,0,0,0,
3,2104.0,1.0,Albany,R-3,1.5,35.0,63.0,,1,1,...,1.0,0,0,0,0,0,0,0,0,
4,2105.0,1.0,Albany,R.4,,,87.0,,0,0,...,0.0,0,0,0,0,0,0,0,0,


Out of 5,156 rows in pba40_plu, 5,156 have unique values of 'id', 4,536 have unique values of 'jz_o'
Out of 1,956,208 rows in p10_pba40_plu, 6,797 or 0.3% are missing 'jz_o' values


Unnamed: 0,PARCEL_ID,LAND_VALUE,ACRES,geom_id_s,geom_id,zoning_id,nodev_pba40,id_pba40,juris_pba40,city_pba40,...,IL_pba40,IW_pba40,IH_pba40,RS_pba40,RB_pba40,MR_pba40,MT_pba40,ME_pba40,plandate_pba40,jz_o
0,229116.0,0.0,3.36052,10305106092872,10305106092872,60126.0,0.0,60126.0,,Livermore,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Livermore 107 - Urban Low Residential UL2
1,244166.0,0.0,1.294423,11107351665227,11107351665227,11903.0,0.0,11903.0,99.0,Livermore,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Livermore GP-ULM
2,202378.0,6036500.0,14.993605,11030175960628,11030175960628,11803.0,0.0,11803.0,98.0,Hayward,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Hayward LDR
3,2004420.0,179954.0,316.247146,6381677629073,6381677629073,12975.0,0.0,12975.0,109.0,Unincorporated Sonoma,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Unincorporated Sonoma LEA240
4,340332.0,0.0,0.621275,314875459798,314875459798,2511.0,0.0,2511.0,5.0,Fremont,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Fremont RESM4


In [6]:
###### P10 with BASIS BOC

## Read BASIS BOC
basis_boc_file = os.path.join(other_inputs_dir,'p10_boc_opt_b_v1d_tbl.csv')
basis_boc_columns = [
    'parcel_id','max_height','max_dua','max_far',
    'plu_id','plu_jurisdiction','plu_description',
    'building_types_source','source'] + [btype.lower() for btype in ALLOWED_BUILDING_TYPE_CODES]
# most are float
basis_boc_dtypes = dict((x, float) for x in basis_boc_columns)
# except these
basis_boc_dtypes['plu_id'               ] = str
basis_boc_dtypes['plu_jurisdiction'     ] = str
basis_boc_dtypes['plu_description'      ] = str
basis_boc_dtypes['building_types_source'] = str
basis_boc_dtypes['source'               ] = str

basis_boc = pd.read_csv(basis_boc_file, usecols = basis_boc_columns, dtype = basis_boc_dtypes)
print("Read {:,} rows from {}".format(len(basis_boc), basis_boc_file))

# append _basis to column names to differentiate between basis PLU and pba40 PLU between 
rename_cols = {}
for col in basis_boc.columns.values:
    # rename the ht, hm, etc to HT_basis, HM_basis, ...
    if len(col)==2:
        rename_cols[col] = col.upper() + "_basis"
    else:
        rename_cols[col] = col + "_basis"
basis_boc.rename(columns=rename_cols, inplace=True)


# report on missing allowed building types
for btype in ALLOWED_BUILDING_TYPE_CODES:
    null_btype_count = len(basis_boc.loc[basis_boc["{}_basis".format(btype)].isnull()])
    print('Number of parcels missing allowable type for {}: {:,} or {:.1f}%'.format(btype,
           null_btype_count, 100.0*null_btype_count/len(basis_boc)))

# merge basis plu to p10 + pba40 plu
p10_basis_pba40_boc = pd.merge(left=p10_pba40_plu, right=basis_boc, left_on='PARCEL_ID', right_on='parcel_id_basis', how='left')

p10_basis_pba40_boc.drop(columns = ['id_pba40','name_pba40','plandate_pba40','jz_o'],inplace = True)
display(p10_basis_pba40_boc.dtypes)

Read 1,933,226 rows from C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\PBA50\Policies\Base zoning\inputs\p10_boc_opt_b_v1d_tbl.csv
Number of parcels missing allowable type for HS: 204,054 or 10.6%
Number of parcels missing allowable type for HT: 204,069 or 10.6%
Number of parcels missing allowable type for HM: 210,129 or 10.9%
Number of parcels missing allowable type for OF: 210,029 or 10.9%
Number of parcels missing allowable type for HO: 210,129 or 10.9%
Number of parcels missing allowable type for SC: 210,103 or 10.9%
Number of parcels missing allowable type for IL: 204,857 or 10.6%
Number of parcels missing allowable type for IW: 210,111 or 10.9%
Number of parcels missing allowable type for IH: 210,072 or 10.9%
Number of parcels missing allowable type for RS: 210,129 or 10.9%
Number of parcels missing allowable type for RB: 210,625 or 10.9%
Number of parcels missing allowable type for MR: 210,129 or 10.9%
Number of parcels missing allowable type for M

PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
geom_id_s                       object
geom_id                         object
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                       float64
RB_pba40                       float64
MR_pba40                       float64
MT_pba40                 

In [7]:
###### Bring in zoning scenarios data

zmod_file = os.path.join(pba50_zoningmod_dir,'p10_pba50_attr_20200416.csv')
zmod = pd.read_csv(
    zmod_file,
    usecols = ['PARCEL_ID','juris','pba50zoningmodcat','nodev'])

print("Read {:,} rows from {}".format(len(zmod), zmod_file))
display(zmod.head())

# append _zmod to column names to clarify source of these columns
rename_cols = dict((col, col+"_zmod") for col in zmod.columns.values)
zmod.rename(columns=rename_cols, inplace=True)
display(zmod.dtypes)

# merge parcel data with zoning mods
p10_basis_pba40_boc_zmod       = pd.merge(left=p10_basis_pba40_boc, right=zmod, 
                                              left_on='PARCEL_ID', right_on='PARCEL_ID_zmod', how = 'left')
print("Created p10_b10_basis_pba40_boc_zmod:")
display(p10_basis_pba40_boc_zmod.dtypes)

Read 1,956,208 rows from C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\PBA50\Policies\Zoning Modifications\p10_pba50_attr_20200416.csv


Unnamed: 0,PARCEL_ID,pba50zoningmodcat,nodev,juris
0,229116.0,livermoreNANAHRADRNAinNA,0,livermore
1,244166.0,livermoreNANADRNAinNA,0,livermore
2,202378.0,haywardNANANANAinNA,0,hayward
3,2004420.0,unincorporated_sonomaNANADRNAoutNA,0,unincorporated_sonoma
4,340332.0,fremontNANAHRADRNAinNA,1,fremont


PARCEL_ID_zmod            float64
pba50zoningmodcat_zmod     object
nodev_zmod                  int64
juris_zmod                 object
dtype: object

Created p10_b10_basis_pba40_boc_zmod:


PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
geom_id_s                       object
geom_id                         object
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                       float64
RB_pba40                       float64
MR_pba40                       float64
MT_pba40                 

In [8]:
###### Bring in jurisdiction_county lookup data
juris_county_lookup_file = os.path.join(GitHub_petrale_dir,'zones\\jurisdictions\\juris_county_id.csv')
juris_county_lookup = pd.read_csv(
    juris_county_lookup_file,
    usecols = ['juris_name_full','juris_id','county_name', 'county_id'])

p10_basis_pba40_boc_zmod_withJuris = p10_basis_pba40_boc_zmod.merge(juris_county_lookup,
                                                                   left_on = 'juris_zmod',right_on='juris_name_full',how='left')

p10_basis_pba40_boc_zmod_withJuris.drop(columns = ['juris_name_full'],inplace = True)
display(p10_basis_pba40_boc_zmod_withJuris.head())

Unnamed: 0,PARCEL_ID,LAND_VALUE,ACRES,geom_id_s,geom_id,zoning_id,nodev_pba40,juris_pba40,city_pba40,max_far_pba40,...,source_basis,plu_description_basis,plu_jurisdiction_basis,PARCEL_ID_zmod,pba50zoningmodcat_zmod,nodev_zmod,juris_zmod,juris_id,county_name,county_id
0,229116.0,0.0,3.36052,10305106092872,10305106092872,60126.0,0.0,,Livermore,,...,,Planned Development,Livermore,229116.0,livermoreNANAHRADRNAinNA,0,livermore,livr,Alameda,1
1,244166.0,0.0,1.294423,11107351665227,11107351665227,11903.0,0.0,99.0,Livermore,,...,,Residential,Livermore,244166.0,livermoreNANADRNAinNA,0,livermore,livr,Alameda,1
2,202378.0,6036500.0,14.993605,11030175960628,11030175960628,11803.0,0.0,98.0,Hayward,,...,,Planned Development,Hayward,202378.0,haywardNANANANAinNA,0,hayward,hayw,Alameda,1
3,2004420.0,179954.0,316.247146,6381677629073,6381677629073,12975.0,0.0,109.0,Unincorporated Sonoma,,...,,Land Extensive Agriculture,Unincorporated Sonoma,2004420.0,unincorporated_sonomaNANADRNAoutNA,0,unincorporated_sonoma,uson,Sonoma,97
4,340332.0,0.0,0.621275,314875459798,314875459798,2511.0,0.0,5.0,Fremont,,...,,Planned District,Fremont,340332.0,fremontNANAHRADRNAinNA,1,fremont,frem,Alameda,1


In [9]:
# Assign allow residential and/or non-residential by summing the columns
# for the residential/nonresidential allowed building type codes
# Returns dataframe with PARCEL_ID, allow_res_[boc_source], allow_nonres_[boc_source]
def set_allow_dev_type(df_original,boc_source):
    # don't modify passed df
    df = df_original.copy()

    # note that they can't be null because then they won't sum -- so make a copy and fillna with 0
    for dev_type in ALLOWED_BUILDING_TYPE_CODES:
        df[dev_type+"_"+boc_source] = df[dev_type+"_"+boc_source].fillna(value=0.0)    
    
    # allow_res is sum of allowed building types that are residential
    res_allowed_columns = [btype+'_'+boc_source for btype in RES_BUILDING_TYPE_CODES]
    df['allow_res_' +boc_source] = df[res_allowed_columns].sum(axis=1)
    
    # allow_nonres is the sum of allowed building types that are non-residential
    nonres_allowed_columns = [btype+'_'+boc_source for btype in NONRES_BUILDING_TYPE_CODES]
    df['allow_nonres_'+boc_source] = df[nonres_allowed_columns].sum(axis=1)
    
    return df[['PARCEL_ID',
               "allow_res_"    +boc_source,
               "allow_nonres_" +boc_source]]

In [10]:
# Impute max_dua from max_far or max_height
# Returns dataframe with PARCEL_ID, max_dua, source_dua_[boc_source] 
#   source_dua is one of: [boc_source]: if it's already set so no imputation is necessary
#                       imputed from max_far
#                       imputed from max_height
#                       missing: if it can't be imputed because max_far and max_height are missing too
#
# Note: For parcels that are nodev or residential development isn't allowed, max_dua isn't important
def impute_max_dua(df_original,boc_source):
    # don't modify passed df
    df = df_original.copy()

    print("impute_max_dua(): Before imputation, number of parcels with missing max_dua_{}: {:,}".format(
        boc_source, sum(df['max_dua_'+boc_source].isnull())))

    # we can only fill in missing if either max_far or max_height is not null   
    max_dua_from_far    = df['max_far_'    +boc_source] * SQUARE_FEET_PER_ACRE / SQUARE_FEET_PER_DU
    max_far_from_height = df['max_height_' +boc_source] / FEET_PER_STORY * PARCEL_USE_EFFICIENCY
    max_dua_from_height = max_far_from_height * SQUARE_FEET_PER_ACRE / SQUARE_FEET_PER_DU
    
    # default to missing
    df['source_dua_'+boc_source] = 'missing'
    
    # this is set already -- nothing to do
    df.loc[df['max_dua_'+boc_source].notnull(), 'source_dua_'+boc_source] = boc_source

    # decide on imputation source
    # for missing values, fill from max_far or max_height -- whichever is available and min
    df.loc[(df['source_dua_'+boc_source]=='missing') & max_dua_from_height.notnull() & max_dua_from_far.notnull() &
           (max_dua_from_height < max_dua_from_far), 'source_dua_'+boc_source] = 'imputed from max_height (as min)'
    df.loc[(df['source_dua_'+boc_source]=='missing') & max_dua_from_height.notnull() & max_dua_from_far.notnull() &
           (max_dua_from_height > max_dua_from_far), "source_dua_"+boc_source] = 'imputed from max_far (as min)'
    # if only one available use that
    df.loc[(df['source_dua_'+boc_source]=='missing') & max_dua_from_height.notnull() & max_dua_from_far.isnull(),
           'source_dua_'+boc_source] = 'imputed from max_height'
    df.loc[(df['source_dua_'+boc_source]=="missing") & max_dua_from_height.isnull() & max_dua_from_far.notnull(),
           'source_dua_'+boc_source] = 'imputed from max_far'
    # imputation is decided -- set it
    df.loc[df['source_dua_'+boc_source]=='imputed from max_height (as min)', 'max_dua_'+boc_source] = max_dua_from_height
    df.loc[df['source_dua_'+boc_source]=='imputed from max_height',          'max_dua_'+boc_source] = max_dua_from_height
    df.loc[df['source_dua_'+boc_source]=='imputed from max_far (as min)',    'max_dua_'+boc_source] = max_dua_from_far
    df.loc[df['source_dua_'+boc_source]=='imputed from max_far',             'max_dua_'+boc_source] = max_dua_from_far

    print("impute_max_dua(): After imputation: ")
    print(df['source_dua_'+boc_source].value_counts())
    print("")

    return df[['PARCEL_ID','max_dua_'+boc_source,'source_dua_'+boc_source]]
    

In [11]:
# Impute max_far from max_height
# Returns dataframe with PARCEL_ID, max_far, source_far_[boc_source] 
#   source_far is one of: [boc_source]: if it's already set so no imputation is necessary
#                       imputed from max_height
#                       missing: if it can't be imputed because max_far and max_height are missing too
#
# Note: For parcels that are nodev or nonresidential development isn't allowed, max_far isn't important
def impute_max_far(df_original,boc_source):
    # don't modify passed df
    df = df_original.copy()

    print("impute_max_far(): Before imputation, number of parcels with missing max_far_{}: {:,}".format(
        boc_source, sum(df['max_far_'+boc_source].isnull())))
    
    # we can only fill in missing if max_height is not null
    max_far_from_height = df['max_height_' +boc_source] / FEET_PER_STORY * PARCEL_USE_EFFICIENCY
    
    # default to missing
    df['source_far_'+boc_source] = 'missing'
    
    # this is set already -- nothing to do
    df.loc[df['max_far_'+boc_source].notnull(), 'source_far_'+boc_source] = boc_source

    # decide on imputation source
    # for missing values, fill from max_height
    df.loc[(df['source_far_'+boc_source]=='missing') & max_far_from_height.notnull(),
           'source_far_'+boc_source] = 'imputed from max_height'

    # imputation is decided -- set it
    df.loc[df['source_far_'+boc_source]=='imputed from max_height', 'max_far_'+boc_source] = max_far_from_height

    print("impute_max_far(): After imputation: ")
    print(df['source_far_'+boc_source].value_counts())
    print("")

    return df[['PARCEL_ID','max_far_'+boc_source,'source_far_'+boc_source]]


In [12]:
# Add basis and pba40 allowed_res_ and allowed_nonres_
allowed_basis = set_allow_dev_type(p10_basis_pba40_boc_zmod_withJuris, "basis")
allowed_pba40 = set_allow_dev_type(p10_basis_pba40_boc_zmod_withJuris, "pba40")

p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris,
                                              right=allowed_basis,
                                              how="left", on="PARCEL_ID")
p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris,
                                              right=allowed_pba40,
                                              how="left", on="PARCEL_ID")
p10_basis_pba40_boc_zmod_withJuris

Unnamed: 0,PARCEL_ID,LAND_VALUE,ACRES,geom_id_s,geom_id,zoning_id,nodev_pba40,juris_pba40,city_pba40,max_far_pba40,...,pba50zoningmodcat_zmod,nodev_zmod,juris_zmod,juris_id,county_name,county_id,allow_res_basis,allow_nonres_basis,allow_res_pba40,allow_nonres_pba40
0,229116.0,0.0,3.360520,10305106092872,10305106092872,60126.0,0.0,,Livermore,,...,livermoreNANAHRADRNAinNA,0,livermore,livr,Alameda,1,0.0,0.0,1.0,0.0
1,244166.0,0.0,1.294423,11107351665227,11107351665227,11903.0,0.0,99.0,Livermore,,...,livermoreNANADRNAinNA,0,livermore,livr,Alameda,1,0.0,0.0,1.0,0.0
2,202378.0,6036500.0,14.993605,11030175960628,11030175960628,11803.0,0.0,98.0,Hayward,,...,haywardNANANANAinNA,0,hayward,hayw,Alameda,1,0.0,7.0,2.0,0.0
3,2004420.0,179954.0,316.247146,6381677629073,6381677629073,12975.0,0.0,109.0,Unincorporated Sonoma,,...,unincorporated_sonomaNANADRNAoutNA,0,unincorporated_sonoma,uson,Sonoma,97,4.0,8.0,1.0,0.0
4,340332.0,0.0,0.621275,314875459798,314875459798,2511.0,0.0,5.0,Fremont,,...,fremontNANAHRADRNAinNA,1,fremont,frem,Alameda,1,0.0,0.0,3.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1956203,580747.0,29321.0,0.071424,17158666132196,17158666132196,10204.0,0.0,82.0,Antioch,,...,antiochNANADRNAinsfd,0,antioch,anti,Contra Costa,13,4.0,10.0,3.0,1.0
1956204,594355.0,30019.0,0.137534,16389503450045,16389503450045,10702.0,0.0,87.0,Concord,,...,concordNANADRNAinsfd,0,concord,conc,Contra Costa,13,2.0,0.0,2.0,0.0
1956205,804156.0,0.0,0.019658,1496694834659,1496694834659,8213.0,0.0,62.0,Novato,0.4,...,novatoNANAHRADRNAinNA,0,novato,nova,Marin,41,0.0,3.0,0.0,3.0
1956206,646401.0,517899.0,0.254764,10694584892329,10694584892329,12302.0,0.0,103.0,Unincorporated Contra Costa,,...,unincorporated_contra_costaNANAHRADRNAinunsfd,0,unincorporated_contra_costa,ucnc,Contra Costa,13,0.0,0.0,1.0,0.0


In [13]:
print(p10_basis_pba40_boc_zmod_withJuris.dtypes)

PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
geom_id_s                       object
geom_id                         object
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                       float64
RB_pba40                       float64
MR_pba40                       float64
MT_pba40                 

In [14]:
dua_basis = impute_max_dua(p10_basis_pba40_boc_zmod_withJuris, "basis")
dua_pba40 = impute_max_dua(p10_basis_pba40_boc_zmod_withJuris, "pba40")

far_basis = impute_max_far(p10_basis_pba40_boc_zmod_withJuris, "basis")
far_pba40 = impute_max_far(p10_basis_pba40_boc_zmod_withJuris, "pba40")

# replace the columns with those with imputations
print(len(p10_basis_pba40_boc_zmod_withJuris))
p10_basis_pba40_boc_zmod_withJuris.drop(columns=['max_dua_basis','max_dua_pba40','max_far_basis','max_far_pba40'], inplace=True)

p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris,
                                              right=dua_basis,
                                              how="left", on="PARCEL_ID")
p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris,
                                              right=dua_pba40,
                                              how="left", on="PARCEL_ID")

p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris,
                                              right=far_basis,
                                              how="left", on="PARCEL_ID")
p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris,
                                              right=far_pba40,
                                              how="left", on="PARCEL_ID")


impute_max_dua(): Before imputation, number of parcels with missing max_dua_basis: 169,961
impute_max_dua(): After imputation: 
basis                               1786247
missing                               83810
imputed from max_far                  59439
imputed from max_far (as min)         17976
imputed from max_height (as min)       8391
imputed from max_height                 345
Name: source_dua_basis, dtype: int64

impute_max_dua(): Before imputation, number of parcels with missing max_dua_pba40: 386,487
impute_max_dua(): After imputation: 
pba40                               1569721
imputed from max_far (as min)        246884
missing                               99028
imputed from max_far                  25421
imputed from max_height (as min)      13664
imputed from max_height                1490
Name: source_dua_pba40, dtype: int64

impute_max_far(): Before imputation, number of parcels with missing max_far_basis: 86,705
impute_max_far(): After imputation: 
basis        

In [15]:
###### Export PLU BOC data to csv

output_columns = [
    'PARCEL_ID','county_id', 'county_name', 'juris_zmod', 'ACRES', 'pba50zoningmodcat_zmod',
    
    # intensity
    'max_far_basis',   'max_far_pba40',
    'source_far_basis','source_far_pba40',
    'max_dua_basis',   'max_dua_pba40',
    'source_dua_basis','source_dua_pba40',
    'max_height_basis','max_height_pba40',

    'nodev_zmod',      'nodev_pba40',

    # allow building types sum
    'allow_res_basis',    'allow_res_pba40',
    'allow_nonres_basis', 'allow_nonres_pba40',

    # BASIS metadata
    'building_types_source_basis','source_basis',
    'plu_id_basis','plu_jurisdiction_basis','plu_description_basis'
]
# allowed building types
for btype in ALLOWED_BUILDING_TYPE_CODES:
    output_columns.append(btype + "_basis")
    output_columns.append(btype + "_pba40")

plu_boc_output = p10_basis_pba40_boc_zmod_withJuris[output_columns]

plu_boc_output.to_csv(os.path.join(data_output_dir, today+'_p10_plu_boc_allAttrs.csv'), index = False)

In [16]:
###### Evaluate development type for QA/QC

plu_boc = plu_boc_output.copy()

for devType in ALLOWED_BUILDING_TYPE_CODES:
    plu_boc[devType+'_comp'] = np.nan

    plu_boc.loc[(plu_boc[devType + '_pba40'] == 1) & 
                (plu_boc[devType + '_basis'] == 0),devType+'_comp'] = 'only PBA40 allow'
    plu_boc.loc[(plu_boc[devType + '_pba40'] == 0) & 
                (plu_boc[devType + '_basis'] == 1),devType+'_comp'] = 'only BASIS allow'
    plu_boc.loc[(plu_boc[devType + '_pba40'] == 1) & 
                (plu_boc[devType + '_basis'] == 1),devType+'_comp'] = 'both allow'
    plu_boc.loc[(plu_boc[devType + '_pba40'] == 0) & 
                (plu_boc[devType + '_basis'] == 0),devType+'_comp'] = 'both not allow'
    plu_boc.loc[(plu_boc[devType + '_basis'].isnull()) & 
                (plu_boc[devType + '_pba40'].notnull()),devType+'_comp'] = 'missing BASIS data' 
    plu_boc.loc[plu_boc[devType + '_pba40' ].isnull(),devType+'_comp'] = 'missing PBA40 data'    
    plu_boc.loc[plu_boc['nodev_zmod'       ] == 1,devType+'_comp'] = 'not developable'
            
devType_comp = plu_boc[['PARCEL_ID','county_id','county_name','juris_zmod', 'ACRES',
                        'nodev_zmod','nodev_pba40'] + 
                       [devType+'_comp' for devType in ALLOWED_BUILDING_TYPE_CODES]]

devType_comp.to_csv(os.path.join(data_output_dir, today+'_devType_comparison.csv'),index = False)