In [4]:
import pandas as pd
import numpy as np
import os
import glob
from datetime import datetime

In [5]:
"""
Input: p10 combined pba40 plu and basis boc data, with 
Output: p10 combined pba40 plu and basis boc data, with: 
    1) NaN in basis allowed development types filled in using pba40 data
    2) basis allowed development types replaced by pba40 data following the 'hybrid index'
"""

if os.getenv('USERNAME')    =='ywang':
    BOX_dir                 = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim 1.5\\PBA50'.format(os.getenv('USERNAME'))
    GitHub_petrale_dir      = 'C:\\Users\\{}\\Documents\\GitHub\\petrale\\'.format(os.getenv('USERNAME'))
    
# input file locations
hybrid_index_dir        = os.path.join(GitHub_petrale_dir, 'policies\\plu\\base_zoning\\hybrid_index')
pba40_zoning_box_dir    = os.path.join(BOX_dir, 'OLD PBA50 Large General Input Data')
pba50_zoningmod_dir     = os.path.join(BOX_dir, 'Policies\\Zoning Modifications')
raw_plu_boc_dir         = os.path.join(BOX_dir, 'Policies\\Base zoning\\outputs')
other_inputs_dir        = os.path.join(BOX_dir, 'Policies\\Base zoning\\inputs')

# output file location
data_output_dir         = os.path.join(BOX_dir, 'Policies\\Base zoning\\outputs\\hybrid_base_zoning')


ALLOWED_BUILDING_TYPE_CODES = ["HS","HT","HM","OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]
RES_BUILDING_TYPE_CODES     = ["HS","HT","HM",                                        "MR"          ]
NONRES_BUILDING_TYPE_CODES  = [               "OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]

today = datetime.today().strftime('%Y_%m_%d')

In [15]:
## P10 parcels with pba40 plu and basis boc data
plu_boc_file = os.path.join(raw_plu_boc_dir, today+'_p10_plu_boc_allAttrs.csv')
plu_boc = pd.read_csv(plu_boc_file)
print("Read {:,} rows from {}".format(len(plu_boc), plu_boc_file))
display(plu_boc.head())

Read 1,956,208 rows from C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\PBA50\Policies\Base zoning\outputs\2020_05_13_p10_plu_boc_allAttrs.csv


Unnamed: 0,PARCEL_ID,county_id,county_name,juris_zmod,ACRES,zoning_id_pba40,pba50zoningmodcat_zmod,max_far_basis,max_far_pba40,source_far_basis,...,RS_basis,RS_pba40,RB_basis,RB_pba40,MR_basis,MR_pba40,MT_basis,MT_pba40,ME_basis,ME_pba40
0,229116,1,Alameda,livermore,3.36052,60126.0,livermoreNANAHRADRNAinNA,0.0,,missing,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,244166,1,Alameda,livermore,1.294423,11903.0,livermoreNANADRNAinNA,0.35,,basis,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,202378,1,Alameda,hayward,14.993605,11803.0,haywardNANANANAinNA,0.0,1.363636,missing,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,2004420,97,Sonoma,unincorporated_sonoma,316.247146,12975.0,unincorporated_sonomaNANADRNAoutNA,1.590909,1.590909,imputed from max_height,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
4,340332,1,Alameda,fremont,0.621275,2511.0,fremontNANAHRADRNAinNA,0.01,2.363636,basis,...,,0.0,,0.0,0.0,0.0,,0.0,,0.0


In [103]:
## Assign allow residential and/or non-residential by summing the columns
## for the residential/nonresidential allowed building type codes
## Returns dataframe with PARCEL_ID, allow_res_[boc_source], allow_nonres_[boc_source]

def set_allow_dev_type(df_original,boc_source):
    # don't modify passed df
    df = df_original.copy()

    # note that they can't be null because then they won't sum -- so make a copy and fillna with 0
    for dev_type in ALLOWED_BUILDING_TYPE_CODES:
        df[dev_type+"_"+boc_source] = df[dev_type+"_"+boc_source].fillna(value=0.0)    
    
    # allow_res is sum of allowed building types that are residential
    res_allowed_columns = [btype+'_'+boc_source for btype in RES_BUILDING_TYPE_CODES]
    df['allow_res_' +boc_source] = df[res_allowed_columns].sum(axis=1)
    
    # allow_nonres is the sum of allowed building types that are non-residential
    nonres_allowed_columns = [btype+'_'+boc_source for btype in NONRES_BUILDING_TYPE_CODES]
    df['allow_nonres_'+boc_source] = df[nonres_allowed_columns].sum(axis=1)
    
    return df[['PARCEL_ID',
               "allow_res_"    +boc_source,
               "allow_nonres_" +boc_source]]

In [104]:
## Index 0: fill in missing allowed development types in BASIS

def countMissing(df, attr):
    null_attr_count = len(df.loc[df["{}_basis".format(attr)].isnull()])
    print('Number of parcels missing {} info: {:,} or {:.1f}%'.format(attr,
           null_attr_count, 100.0*null_attr_count/len(df)))

print('Count number of parcels missing allowable development types in the BASIS data:')
for devType in ALLOWED_BUILDING_TYPE_CODES:
    countMissing(plu_boc, devType)

plu_boc_filled_devTypeNa = plu_boc.copy()

for btype in ALLOWED_BUILDING_TYPE_CODES:
    plu_boc_filled_devTypeNa[btype+'_idx'] = 1
    missing_idx = (plu_boc_filled_devTypeNa[btype+'_basis'].isnull()) & (plu_boc_filled_devTypeNa['nodev_zmod'] == 0)
    plu_boc_filled_devTypeNa.loc[missing_idx, btype+'_basis'] = plu_boc_filled_devTypeNa.loc[missing_idx, btype+'_pba40']
    plu_boc_filled_devTypeNa.loc[missing_idx, btype+'_idx'] = '0_fill_na'

print('\n After filling nan in BASIS allowable development types using PBA40 data:')
for devType in ALLOWED_BUILDING_TYPE_CODES:
    countMissing(plu_boc_filled_devTypeNa, devType)

# recalculate 'allow_res' and 'allow_nonres' based on the allowable development type
allowed_basis = set_allow_dev_type(plu_boc_filled_devTypeNa,'basis')
allowed_pba40 = set_allow_dev_type(plu_boc_filled_devTypeNa,'pba40')
    
# drop the previous 'allow_res' and 'allow_nonres' and insert the new ones
plu_boc_filled_devTypeNa.drop(columns = ['allow_res_basis', 'allow_nonres_basis', 
                                         'allow_res_pba40', 'allow_nonres_pba40'], inplace = True)
plu_boc_filled_devTypeNa = plu_boc_filled_devTypeNa.merge(allowed_basis, 
                                                          on = 'PARCEL_ID', 
                                                          how = 'left').merge(allowed_pba40, 
                                                                              on = 'PARCEL_ID', 
                                                                              how = 'left')


#plu_boc_filled_devTypeNa.to_csv(os.path.join(data_output_dir, today+'_p10_plu_boc_fill_naType.csv'),index = False)

print('Print unique data source index for allowable development types (should only be "0_fill_na" or "1"):')
for i in ALLOWED_BUILDING_TYPE_CODES:
    print('{}: {}'.format(i,plu_boc_filled_devTypeNa[i+'_idx'].unique()))


Count number of parcels missing allowable development types in the BASIS data:
Number of parcels missing HS info: 94,415 or 4.8%
Number of parcels missing HT info: 94,415 or 4.8%
Number of parcels missing HM info: 94,415 or 4.8%
Number of parcels missing OF info: 233,073 or 11.9%
Number of parcels missing HO info: 233,173 or 11.9%
Number of parcels missing SC info: 233,146 or 11.9%
Number of parcels missing IL info: 227,894 or 11.6%
Number of parcels missing IW info: 233,154 or 11.9%
Number of parcels missing IH info: 233,114 or 11.9%
Number of parcels missing RS info: 233,173 or 11.9%
Number of parcels missing RB info: 233,670 or 11.9%
Number of parcels missing MR info: 94,415 or 4.8%
Number of parcels missing MT info: 233,173 or 11.9%
Number of parcels missing ME info: 233,235 or 11.9%

 After filling nan in BASIS allowable development types using PBA40 data:
Number of parcels missing HS info: 6,333 or 0.3%
Number of parcels missing HT info: 6,333 or 0.3%
Number of parcels missing HM

In [105]:
## Apply hybrid index to raw plu_boc data. 
## Returns plu_boc with updated allowable dev type and intensity parameters for BASIS fields, 
## along with '_idx' for each parameter to indicate the data source - '1' for BASIS '0' for PBA40

def apply_hybrid_idx(df_origional,hybrid_idx):
    # don't modify passed df
    df = df_origional.copy()
    for zoning in ALLOWED_BUILDING_TYPE_CODES + ['max_dua','max_far','max_height']:
        df[zoning+'_urbansim'] = df[zoning+'_basis']
    for intensity in ['max_dua','max_far','max_height']:
        df[intensity+'_idx'] = 1
    
    for juris in juris_list:
        
        print('')
        print('Apply hybrid index for: {}'.format(juris))
        for devType in ALLOWED_BUILDING_TYPE_CODES:
            if hybrid_idx[devType+'_idx'][juris] == 0:
                print('Use PBA40 data for {}'.format(devType))
                #print('Before applying the index, parcel counts by data source for {}:'.format(devType))
                #display(df.loc[df.juris_zmod == juris][devType+'_idx'].value_counts())
                
                replace_idx = (df.juris_zmod == juris) & (df[devType+'_idx'] != 'PBA40_fill_na')
                df.loc[replace_idx, devType+'_urbansim'] = df.loc[replace_idx, devType+'_pba40']
                df.loc[replace_idx, devType+'_idx'] = 0
                #print('After applying the index, parcel counts by data source for {}:'.format(devType))
                #display(df.loc[df.juris_zmod == juris][devType+'_idx'].value_counts())   
                
            elif hybrid_idx[devType+'_idx'][juris] == 1:
                print('Use BASIS data for {}'.format(devType))
        

        for intensity in ['max_dua','max_far','max_height']:
            if hybrid_idx[intensity+'_idx'][juris] == 0:
                print('Use PBA40 data for {}'.format(intensity))
                #print('Before applying the index, parcel counts by data source for {}:'.format(intensity))
                #display(df.loc[df.juris_zmod == juris][intensity+'_idx'].value_counts())
                
                replace_idx = df.juris_zmod == juris
                df.loc[replace_idx, intensity+'_urbansim'] = df.loc[replace_idx, intensity+'_pba40']
                df.loc[replace_idx, intensity+'_idx'] = 0
                #print('After applying the index, parcel counts by data source for {}:'.format(intensity))
                #display(df.loc[df.juris_zmod == juris][intensity+'_idx'].value_counts())
                
            elif hybrid_idx[intensity+'_idx'][juris] == 1:
                print('Use BASIS data for {}'.format(intensity))
                print('')

    return df

In [106]:
## fill in missing height in BASIS

print('Count number of parcels missing max_height in the BASIS data:')
countMissing(plu_boc_filled_devTypeNa, 'max_height')

plu_boc_filled_TpHt_Na = plu_boc_filled_devTypeNa.copy()
plu_boc_filled_TpHt_Na['max_height_idx'] = 1
missing_idx = (plu_boc_filled_TpHt_Na['max_height_basis'].isnull()) & (plu_boc_filled_TpHt_Na['nodev_zmod'] == 0)
plu_boc_filled_TpHt_Na.loc[missing_idx, 'max_height_basis'] = plu_boc_filled_TpHt_Na.loc[missing_idx, 'max_height_pba40']
plu_boc_filled_TpHt_Na.loc[missing_idx, 'max_height_idx'] = '0_fill_na'

print('\n After filling nan in BASIS max_height using PBA40 data:')
countMissing(plu_boc_filled_TpHt_Na, 'max_height')

Count number of parcels missing max_height in the BASIS data:
Number of parcels missing max_height info: 651,790 or 33.3%

 After filling nan in BASIS max_height using PBA40 data:
Number of parcels missing max_height info: 232,597 or 11.9%


In [107]:
## Apply the hybrid index of each hybrid version
for hybrid_idx_file in list(glob.glob(hybrid_index_dir+'/*.csv')):
    hybrid_name = os.path.basename(hybrid_idx_file).split('.')[0][4:]
    print('Hybrid version: {}'.format(hybrid_name))
    hybrid_idx = pd.read_csv(hybrid_idx_file)
    hybrid_idx.rename(columns = {'MAX_FAR_idx'   : 'max_far_idx', 
                                 'MAX_DUA_idx'   : 'max_dua_idx',
                                 'MAX_HEIGHT_idx': 'max_height_idx'}, inplace = True)
    display(hybrid_idx.head())
    hybrid_idx.set_index('juris_name',inplace = True)
    juris_list =list(hybrid_idx.index.values)
    
    for devType in ALLOWED_BUILDING_TYPE_CODES:
        print(devType)
        display(plu_boc_filled_TpHt_Na[devType+'_idx'].value_counts())
          
    plu_boc_hybrid = apply_hybrid_idx(plu_boc_filled_TpHt_Na,hybrid_idx)
    
    for devType in ALLOWED_BUILDING_TYPE_CODES:
        print(devType)
        display(plu_boc_hybrid[devType+'_idx'].value_counts())
        
    for intensity in ['max_dua','max_far','max_height']:
        print(intensity)
        display(plu_boc_hybrid[intensity+'_idx'].value_counts())


# recalculate 'allow_res' and 'allow_nonres' based on the allowable development type

    allowed_basis = set_allow_dev_type(plu_boc_hybrid,'basis')
    allowed_pba40 = set_allow_dev_type(plu_boc_hybrid,'pba40')
    allowed_urbansim = set_allow_dev_type(plu_boc_hybrid,'urbansim')
    
    # drop the previous 'allow_res' and 'allow_nonres' and insert the new ones
    plu_boc_hybrid.drop(columns = ['allow_res_basis', 'allow_nonres_basis', 
                                   'allow_res_pba40', 'allow_nonres_pba40'], inplace = True)
    plu_boc_hybrid = plu_boc_hybrid.merge(allowed_basis, 
                                          on = 'PARCEL_ID', 
                                          how = 'left').merge(allowed_pba40, 
                                                              on = 'PARCEL_ID', 
                                                              how = 'left').merge(allowed_urbansim,
                                                                                  on = 'PARCEL_ID', 
                                                                                  how = 'left' ) 

    #plu_boc_hybrid.to_csv(os.path.join(data_output_dir, today+'_p10_plu_boc_'+hybrid_name+'.csv'),index = False)
    
    if hybrid_name == '_urbansim':
        plu_boc_urbansim_cols = ['PARCEL_ID','county_id','county_name', 'juris_zmod', 'ACRES','pba50zoningmodcat_zmod','nodev_zmod'] + [
                         devType + '_urbansim' for devType in ALLOWED_BUILDING_TYPE_CODES] + [
                         intensity + '_urbansim' for intensity in ['max_dua','max_far','max_height']]

        plu_boc_urbansim = plu_boc_hybrid[plu_boc_urbansim_cols]

        plu_boc_urbansim.columns = ['PARCEL_ID','county_id','county_name', 'juris_zmod', 'ACRES',
                                    'pba50zoningmodcat_zmod','nodev_zmod'] + ALLOWED_BUILDING_TYPE_CODES + [
                                    'max_dua','max_far','max_height']

        for attr in ALLOWED_BUILDING_TYPE_CODES:
            plu_boc_urbansim[attr] = plu_boc_urbansim[attr].fillna(-1).astype(int)
        plu_boc_urbansim.replace({-1: np.nan}, inplace = True)

        
        zoning_lookup_pba50 = plu_boc_urbansim[['county_id','juris_zmod'] + ALLOWED_BUILDING_TYPE_CODES + ['max_dua','max_far','max_height']].drop_duplicates()
        print(zoning_lookup_pba50.shape)
        zoning_lookup_pba50.sort_values(by=['county_id', 'juris_zmod'], inplace = True)
        zoning_lookup_pba50['zoning_id_pba50'] = range(1,len(zoning_lookup_pba50) + 1)
        zoning_lookup_pba50.head()
         
        plu_boc_urbansim_ID = plu_boc_urbansim.merge(zoning_lookup_pba50,
                                             on = list(zoning_lookup_pba50)[:-1],
                                             how = 'left')
        zoning_parcels_pba50 = plu_boc_urbansim_ID[['PARCEL_ID','juris_zmod','zoning_id_pba50','nodev_zmod']]
        
        zoning_lookup_pba50.to_csv(os.path.join(data_output_dir, today+'_zoning_lookup_pba50.csv'),index = False)
        zoning_parcels_pba50.to_csv(os.path.join(data_output_dir, today+'_zoning_parcels_pba50.csv'),index = False)
        

Hybrid version: urbansim


Unnamed: 0,juris_id,juris_name,county,OF_idx,HO_idx,SC_idx,IL_idx,IW_idx,IH_idx,RS_idx,...,ME_idx,HS_idx,HT_idx,HM_idx,max_dua_idx,max_far_idx,max_height_idx,proportion_adj_dua,proportion_adj_far,proportion_adj_height
0,alam,alameda,ala,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,1,1
1,alba,albany,ala,1,0,0,0,0,0,1,...,0,0,0,1,1,1,1,1,1,1
2,berk,berkeley,ala,0,0,0,0,0,0,0,...,0,0,0,0,0,1,1,1,1,1
3,dubl,dublin,ala,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,1
4,emer,emeryville,ala,0,0,0,0,0,0,0,...,0,0,0,1,1,0,0,1,1,1


HS


1            1867576
0_fill_na      88632
Name: HS_idx, dtype: int64

HT


1            1867576
0_fill_na      88632
Name: HT_idx, dtype: int64

HM


1            1867576
0_fill_na      88632
Name: HM_idx, dtype: int64

OF


1            1744067
0_fill_na     212141
Name: OF_idx, dtype: int64

HO


1            1743967
0_fill_na     212241
Name: HO_idx, dtype: int64

SC


1            1743977
0_fill_na     212231
Name: SC_idx, dtype: int64

IL


1            1748893
0_fill_na     207315
Name: IL_idx, dtype: int64

IW


1            1743972
0_fill_na     212236
Name: IW_idx, dtype: int64

IH


1            1744014
0_fill_na     212194
Name: IH_idx, dtype: int64

RS


1            1743967
0_fill_na     212241
Name: RS_idx, dtype: int64

RB


1            1743474
0_fill_na     212734
Name: RB_idx, dtype: int64

MR


1            1867576
0_fill_na      88632
Name: MR_idx, dtype: int64

MT


1            1743967
0_fill_na     212241
Name: MT_idx, dtype: int64

ME


1            1743908
0_fill_na     212300
Name: ME_idx, dtype: int64


Apply hybrid index for: alameda
Use PBA40 data for HS
Use PBA40 data for HT
Use BASIS data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use PBA40 data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
Use PBA40 data for max_far
Use PBA40 data for max_height

Apply hybrid index for: albany
Use PBA40 data for HS
Use PBA40 data for HT
Use BASIS data for HM
Use BASIS data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use BASIS data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use BASIS data for max_dua

Use BASIS data for max_far

Use BASIS data for max_height


Apply hybrid index for: berkeley
Use PBA40 data for HS
Use PBA40 data for HT
Use PBA40 data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use 

Use PBA40 data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use PBA40 data for RS
Use PBA40 data for RB
Use PBA40 data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
Use PBA40 data for max_far
Use PBA40 data for max_height

Apply hybrid index for: el_cerrito
Use PBA40 data for HS
Use PBA40 data for HT
Use BASIS data for HM
Use BASIS data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use BASIS data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use BASIS data for max_dua

Use BASIS data for max_far

Use BASIS data for max_height


Apply hybrid index for: hercules
Use PBA40 data for HS
Use PBA40 data for HT
Use PBA40 data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 d

Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use PBA40 data for RS
Use PBA40 data for RB
Use PBA40 data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use BASIS data for max_dua

Use BASIS data for max_far

Use BASIS data for max_height


Apply hybrid index for: mill_valley
Use PBA40 data for HS
Use PBA40 data for HT
Use PBA40 data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use PBA40 data for RS
Use PBA40 data for RB
Use PBA40 data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
Use PBA40 data for max_far
Use PBA40 data for max_height

Apply hybrid index for: novato
Use PBA40 data for HS
Use PBA40 data for HT
Use PBA40 data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use PBA40 data for RS
Use PBA40 data for RB
Use BASIS da

Use PBA40 data for IW
Use PBA40 data for IH
Use BASIS data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use BASIS data for max_dua

Use PBA40 data for max_far
Use PBA40 data for max_height

Apply hybrid index for: los_gatos
Use PBA40 data for HS
Use PBA40 data for HT
Use BASIS data for HM
Use BASIS data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use BASIS data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
Use PBA40 data for max_far
Use PBA40 data for max_height

Apply hybrid index for: milpitas
Use PBA40 data for HS
Use PBA40 data for HT
Use PBA40 data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use PBA40 data for RS
Use PBA40 data for RB
Use PBA40 data for MR
Use PBA40 data for MT
Use PBA40 data

Use BASIS data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
Use BASIS data for max_far

Use BASIS data for max_height


Apply hybrid index for: foster_city
Use PBA40 data for HS
Use PBA40 data for HT
Use BASIS data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use PBA40 data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
Use PBA40 data for max_far
Use PBA40 data for max_height

Apply hybrid index for: half_moon_bay
Use PBA40 data for HS
Use PBA40 data for HT
Use PBA40 data for HM
Use BASIS data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use BASIS data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
U

Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
Use PBA40 data for max_far
Use PBA40 data for max_height

Apply hybrid index for: unincorporated_solano
Use PBA40 data for HS
Use PBA40 data for HT
Use BASIS data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use PBA40 data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
Use PBA40 data for max_far
Use PBA40 data for max_height

Apply hybrid index for: vacaville
Use PBA40 data for HS
Use PBA40 data for HT
Use PBA40 data for HM
Use PBA40 data for OF
Use PBA40 data for HO
Use PBA40 data for SC
Use PBA40 data for IL
Use PBA40 data for IW
Use PBA40 data for IH
Use PBA40 data for RS
Use PBA40 data for RB
Use BASIS data for MR
Use PBA40 data for MT
Use PBA40 data for ME
Use PBA40 data for max_dua
Use PBA40 data for max_far
Use PBA40 data

0    1956208
Name: HS_idx, dtype: int64

HT


0    1956208
Name: HT_idx, dtype: int64

HM


0            1702179
1             249004
0_fill_na       5025
Name: HM_idx, dtype: int64

OF


0            1717563
1             203006
0_fill_na      35639
Name: OF_idx, dtype: int64

HO


0    1956208
Name: HO_idx, dtype: int64

SC


0    1956208
Name: SC_idx, dtype: int64

IL


0    1956208
Name: IL_idx, dtype: int64

IW


0    1956208
Name: IW_idx, dtype: int64

IH


0    1956208
Name: IH_idx, dtype: int64

RS


0            1674777
1             270656
0_fill_na      10775
Name: RS_idx, dtype: int64

RB


0    1956208
Name: RB_idx, dtype: int64

MR


0            1087264
1             856630
0_fill_na      12314
Name: MR_idx, dtype: int64

MT


0    1956208
Name: MT_idx, dtype: int64

ME


0    1956208
Name: ME_idx, dtype: int64

max_dua


0    1771933
1     184275
Name: max_dua_idx, dtype: int64

max_far


0    1221717
1     734491
Name: max_far_idx, dtype: int64

max_height


0    1229511
1     726697
Name: max_height_idx, dtype: int64

In [66]:
# Stats on usage of PBA40 vs BASIS data
display(plu_boc_hybrid.groupby('juris_zmod')['max_height_idx','max_dua_idx','max_far_idx'].sum().reset_index())

  


Unnamed: 0,juris_zmod,max_height_idx,max_dua_idx,max_far_idx
0,alameda,0,0,0
1,albany,4578,4578,4578
2,american_canyon,5211,5211,5211
3,antioch,0,0,0
4,atherton,2588,2588,2588
...,...,...,...,...
104,vallejo,0,0,0
105,walnut_creek,15999,0,15999
106,windsor,0,0,0
107,woodside,2408,0,2408


In [120]:
basis_boc_file = os.path.join(other_inputs_dir,'p10_urbansim_boc_opt_b_v2.csv')
basis_boc_columns = ['parcel_id','plu_code','plu_jurisdiction','plu_description']

basis_zoning_id = pd.read_csv(basis_boc_file, usecols = basis_boc_columns)
basis_zoning_id = basis_zoning_id.loc[basis_zoning_id.parcel_id.notnull()]
print("After dropping parcel_id = nan, basis_boc has {:,} rows".format(len(basis_zoning_id)))

basis_zoning_id['parcel_id'] = basis_zoning_id['parcel_id'].apply(lambda x: int(round(x)))
basis_zoning_id['plu_code'] = basis_zoning_id['plu_code'].apply(lambda x: str(x)+'_basis')

basis_zoning_id

After dropping parcel_id = nan, basis_boc has 1,933,226 rows


Unnamed: 0,parcel_id,plu_jurisdiction,plu_code,plu_description
0,2054274,San Jose,RN_basis,Residential Neighborhood
1,2047652,Santa Rosa,RR-20_basis,Rural Residential
2,2045583,Santa Rosa,RR-20_basis,Rural Residential
3,2044679,Santa Rosa,R-1-6_basis,Single-Family Residential
4,2043774,Santa Rosa,R-1-6_basis,Single-Family Residential
...,...,...,...,...
1956203,487946,Unincorporated Contra Costa,A-80_basis,Exclusive Agricultural
1956204,410506,Unincorporated Contra Costa,A-40_basis,Exclusive Agricultural
1956205,245028,Unincorporated Alameda,A - BE_basis,Agricultural
1956206,239927,Unincorporated Alameda,A - CA_basis,Agricultural


In [None]:
key_attrs = [x + '_idx' for x in ['max_dua','max_far','max_height','HM','MR','RS','OF']]
plu_boc_hybrid['code_select'] = plu_boc_hybrid[key_attrs].sum(axis=1)

In [121]:
p10_zoning_id = pd.read_csv(r'C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\PBA50\Policies\Base zoning\outputs\2020_05_14_p10_plu_boc_allAttrs.csv')

FileNotFoundError: [Errno 2] File C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\PBA50\Policies\Base zoning\outputs\2020_05_14_p10_plu_boc_allAttrs.csv does not exist: 'C:\\Users\\ywang\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim 1.5\\PBA50\\Policies\\Base zoning\\outputs\\2020_05_14_p10_plu_boc_allAttrs.csv'

In [None]:
p10

(7334, 19)


Unnamed: 0,county_id,juris_zmod,HS,HT,HM,OF,HO,SC,IL,IW,IH,RS,RB,MR,MT,ME,max_dua,max_far,max_height,zoning_id_pba50
66,1,alameda,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,1
95,1,alameda,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,,,,2
96,1,alameda,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.7,,,3
280,1,alameda,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,72.6,2.0,,4
373,1,alameda,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.8,,,5


In [None]:
# Parcels missing zoning_id_pba40 but have a plu_id_basis
test = plu_boc_hybrid.copy()

parcels_new_zone_id_idx = (test['zoning_id_pba40'].isnull()) & (test['plu_id_basis'].notnull())
parcels_new_zone_id = test.loc[parcels_new_zone_id_idx]

print('Number of parcels with plu_id_basis but no zoning_id_pba40: {}'.format(parcels_new_zone_id.shape[0]))

test.zoning_id_pba40 = test.zoning_id_pba40.fillna(-1).astype(np.int64)

new_plu_id = list(parcels_new_zone_id.plu_id_basis.unique())
print('Number of unique plu_id_basis from these parcels: {}'.format(len(new_plu_id)))

# the largest number of zoning_id_pba40 is 99021 except for '99999', therefore, the new zone code starts from the integer following 
# the second largest zoning_id_pba40
old_zone_id = sorted(list(test.zoning_id_pba40.unique()))
print('Number of unique zoning_id_pba40: {}'.format(len(old_zone_id)))
end_of_old_zone_id = old_zone_id[-2]
gap = old_zone_id[-1] - old_zone_id[-2] - 1

if gap > len(new_plu_id):
    new_zone_id = list(range(end_of_old_zone_id+1, end_of_old_zone_id+1+len(new_plu_id)))
    zone_id_lookup =dict(zip(new_plu_id + old_zone_id , new_zone_id + old_zone_id))
    
    test['zoning_id_pba50_temp'] = test['zoning_id_pba40']
    test.loc[parcels_new_zone_id_idx,'zoning_id_pba50_temp'] = test.loc[parcels_new_zone_id_idx,'plu_id_basis']
    test['zoning_id_pba50'] = test['zoning_id_pba50_temp'].map(zone_id_lookup)
    print('Number of unique zoning_id_pba50: {}'.format(len(test['zoning_id_pba50'].unique())))
    
else:
    print('Cannot generate new zoning_id_pba50 for all new plu_id_basis.')


In [None]:
test.loc[test.zoning_id_pba40 != test.zoning_id_pba50]

In [None]:
test[['plu_id_basis', 'plu_jurisdiction_basis', 'plu_description_basis']]

In [None]:
zoning_parcels.shape

In [None]:
zoning_lookup.shape

In [None]:
len(plu_boc_hybrid.zoning_id_pba40.unique())

In [None]:
test[[intensity+'_basis' for intensity in ['max_far','max_far','max_height']] + 
                     [devType  +'_basis' for devType   in ALLOWED_BUILDING_TYPE_CODES]].drop_duplicates()