In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import simpledbf
import fiona
from datetime import datetime
pd.options.display.max_rows = 100

In [2]:
################ Data Sources #################

if os.getenv('USERNAME')=='ywang':
    folder      = 'C:\\Users\\ywang\\Documents\\\Files_for_Py\\BASIS\\BOC'
    input_dir   = os.path.join(folder, 'inputs')
    output_dir  = os.path.join(folder, 'outputs')


ALLOWED_BUILDING_TYPE_CODES = ["HS","HT","HM","OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]
RES_BUILDING_TYPE_CODES     = ["HS","HT","HM",                                        "MR"          ]
NONRES_BUILDING_TYPE_CODES  = [               "OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]

today = datetime.today().strftime('%Y_%m_%d')

In [3]:
## Pacel 10
### Origional sources: https://mtcdrive.app.box.com/folder/106699591369
basemap_p10_file = os.path.join(input_dir, 'p10.csv')
basemap_p10 = pd.read_csv(
    basemap_p10_file,
    usecols =['PARCEL_ID','geom_id_s','COUNTY_ID','jurisdiction','ACRES','LAND_VALUE'],
    dtype   ={'PARCEL_ID':np.float64, 'geom_id_s':str, 'COUNTY_ID':np.int32, 'juristiction':str, 
              'ACRES':np.float64, 'LAND_VALUE':np.float64})
print("Read {:,} rows from {}".format(len(basemap_p10), basemap_p10_file))
display(basemap_p10.head())

Read 1,956,208 rows from C:\Users\ywang\Documents\\Files_for_Py\BASIS\BOC\inputs\p10.csv


Unnamed: 0,PARCEL_ID,LAND_VALUE,ACRES,COUNTY_ID,geom_id_s,jurisdiction
0,229116.0,0.0,3.36052,1,10305106092872,41992
1,244166.0,0.0,1.294423,1,11107351665227,41992
2,202378.0,6036500.0,14.993605,1,11030175960628,33000
3,2004420.0,179954.0,316.247146,97,6381677629073,97
4,340332.0,0.0,0.621275,1,314875459798,26000


In [4]:
cnty = {'Alameda': 1.0,
'Contra Costa': 13.0,
'Marin': 41.0,
'Napa': 55.0,
'San Francisco': 75.0,
'San Mateo': 81.0,
'Santa Clara': 85.0,
'Solano': 95.0,
'Sonoma': 97.0}

ctyMap = pd.DataFrame(cnty.items(), columns=['CTY_NAME', 'COUNTY_ID'])

basemap_p10 = basemap_p10.merge(ctyMap,on='COUNTY_ID',how='left')
print(basemap_p10.shape)
#display(basemap_p10.head())

(1956208, 7)


In [5]:
###### P10 parcels zoining designations

## pacel to zoning code mapping
pba40_pz_file = os.path.join(input_dir, '2015_12_21_zoning_parcels.csv')
pba40_pz = pd.read_csv(
    pba40_pz_file,
    usecols = ['geom_id','zoning_id','nodev'],
    dtype = {'geom_id':str, 'zoning_id':np.float64, 'nodev_pba40':np.int})

print("Read {:,} rows from {}".format(len(pba40_pz), pba40_pz_file))
display(pba40_pz.head())

## add zoning_id, nodev_pba40 columns to p10
p10_pba40_pz = pd.merge(left=basemap_p10, right=pba40_pz, left_on='geom_id_s', right_on = 'geom_id', how='left')
p10_pba40_pz.rename(columns={'nodev':'nodev_pba40'}, inplace=True)
#display(p10_pba40_pz.head())

## Check Number of parcels missing zoning designation
p10_pba40_pz_missing = p10_pba40_pz.loc[p10_pba40_pz['zoning_id'].isnull()]
print("Out of {0:,} p10 parcels, {1:,} or {2:.1f}% are missing 'zoning_id' values".format(
    len(p10_pba40_pz), len(p10_pba40_pz_missing), 100.0*len(p10_pba40_pz_missing)/len(p10_pba40_pz)))

Read 1,950,733 rows from C:\Users\ywang\Documents\\Files_for_Py\BASIS\BOC\inputs\2015_12_21_zoning_parcels.csv


Unnamed: 0,geom_id,zoning_id,nodev
0,1846247885201,12202.0,0
1,11768793521677,12204.0,0
2,807545210880,12204.0,0
3,8785012057974,12204.0,0
4,14057552282712,12204.0,0


Out of 1,956,208 p10 parcels, 6,797 or 0.3% are missing 'zoning_id' values


In [6]:
###### P10 parcels with PBA40 zoning code PLU

pba40_plu_file = os.path.join(input_dir, 'zoning_lookup.csv')
pba40_plu = pd.read_csv(pba40_plu_file, dtype={'id':float})
print("Read {:,} rows from {}".format(len(pba40_plu), pba40_plu_file))
# coerce this column to float -- it's a string for some reason
pba40_plu['SC'] = pd.to_numeric(pba40_plu['SC'], errors='coerce')

# append _pba40 to column names
rename_cols = dict((col,col+"_pba40") for col in pba40_plu.columns.values)
pba40_plu.rename(columns=rename_cols, inplace=True)
display(pba40_plu.head())

# check duplicates in zoning id
pba40_plu['jz_o'] = pba40_plu['city_pba40'].str.cat(pba40_plu['name_pba40'],sep=" ")
print("Out of {:,} rows in pba40_plu, {:,} have unique values of 'id', {:,} have unique values of 'jz_o'".format(
    len(pba40_plu), len(pba40_plu.id_pba40.unique()), len(pba40_plu.jz_o.unique())))

# using the zoning_id, get the pba40 zoning data (intensities, allowed building types)
p10_pba40_plu = pd.merge(left=p10_pba40_pz, right=pba40_plu, left_on='zoning_id', right_on='id_pba40', how='left')

# Check number of p10 records failed to find a matching PLU
p10_pba40_plu_missing = p10_pba40_plu.loc[p10_pba40_plu['jz_o'].isnull()]
print("Out of {0:,} rows in p10_pba40_plu, {1:,} or {2:.1f}% are missing 'jz_o' values".format(
    len(p10_pba40_plu), len(p10_pba40_plu_missing), 100.0*len(p10_pba40_plu_missing)/len(p10_pba40_plu)))

display(p10_pba40_plu.head())

Read 5,156 rows from C:\Users\ywang\Documents\\Files_for_Py\BASIS\BOC\inputs\zoning_lookup.csv


Unnamed: 0,id_pba40,juris_pba40,city_pba40,name_pba40,max_far_pba40,max_height_pba40,max_dua_pba40,max_du_per_parcel_pba40,HS_pba40,HT_pba40,...,SC_pba40,IL_pba40,IW_pba40,IH_pba40,RS_pba40,RB_pba40,MR_pba40,MT_pba40,ME_pba40,plandate_pba40
0,2101.0,1.0,Albany,RHD,0.5,35.0,9.0,,1,0,...,0.0,0,0,0,0,0,0,0,0,
1,2102.0,1.0,Albany,R-1,0.55,28.0,12.0,,1,0,...,1.0,0,0,0,0,0,0,0,0,
2,2103.0,1.0,Albany,R-2,0.55,35.0,35.0,,1,1,...,1.0,0,0,0,0,0,0,0,0,
3,2104.0,1.0,Albany,R-3,1.5,35.0,63.0,,1,1,...,1.0,0,0,0,0,0,0,0,0,
4,2105.0,1.0,Albany,R.4,,,87.0,,0,0,...,0.0,0,0,0,0,0,0,0,0,


Out of 5,156 rows in pba40_plu, 5,156 have unique values of 'id', 4,536 have unique values of 'jz_o'
Out of 1,956,208 rows in p10_pba40_plu, 6,797 or 0.3% are missing 'jz_o' values


Unnamed: 0,PARCEL_ID,LAND_VALUE,ACRES,COUNTY_ID,geom_id_s,jurisdiction,CTY_NAME,geom_id,zoning_id,nodev_pba40,...,IL_pba40,IW_pba40,IH_pba40,RS_pba40,RB_pba40,MR_pba40,MT_pba40,ME_pba40,plandate_pba40,jz_o
0,229116.0,0.0,3.36052,1,10305106092872,41992,Alameda,10305106092872,60126.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Livermore 107 - Urban Low Residential UL2
1,244166.0,0.0,1.294423,1,11107351665227,41992,Alameda,11107351665227,11903.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Livermore GP-ULM
2,202378.0,6036500.0,14.993605,1,11030175960628,33000,Alameda,11030175960628,11803.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Hayward LDR
3,2004420.0,179954.0,316.247146,97,6381677629073,97,Sonoma,6381677629073,12975.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Unincorporated Sonoma LEA240
4,340332.0,0.0,0.621275,1,314875459798,26000,Alameda,314875459798,2511.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Fremont RESM4


In [7]:
###### P10 with BASIS BOC

## Read BASIS BOC
basis_boc_file = os.path.join(input_dir,'p10_boc_opt_b_v1d_tbl.csv')
basis_boc_columns = [
    'parcel_id','max_height','max_dua','max_far',
    'plu_id','plu_jurisdiction','plu_description',
    'building_types_source','source'] + [btype.lower() for btype in ALLOWED_BUILDING_TYPE_CODES]
# most are integer
basis_boc_dtypes = dict((x, float) for x in basis_boc_columns)
# except these
basis_boc_dtypes['plu_id'               ] = str
basis_boc_dtypes['plu_jurisdiction'     ] = str
basis_boc_dtypes['plu_description'      ] = str
basis_boc_dtypes['building_types_source'] = str
basis_boc_dtypes['source'               ] = str

basis_boc = pd.read_csv(basis_boc_file, usecols = basis_boc_columns, dtype = basis_boc_dtypes)
print("Read {:,} rows from {}".format(len(basis_boc), basis_boc_file))

# append _basis to column names to differentiate between basis PLU and pba40 PLU between 
rename_cols = {}
for col in basis_boc.columns.values:
    # rename the ht, hm, etc to HT_basis, HM_basis, ...
    if len(col)==2:
        rename_cols[col] = col.upper() + "_basis"
    else:
        rename_cols[col] = col + "_basis"
basis_boc.rename(columns=rename_cols, inplace=True)


# report on missing allowed building types
for btype in ALLOWED_BUILDING_TYPE_CODES:
    null_btype_count = len(basis_boc.loc[basis_boc["{}_basis".format(btype)].isnull()])
    print('Number of parcels missing allowable type for {}: {:,} or {:.1f}%'.format(btype,
           null_btype_count, 100.0*null_btype_count/len(basis_boc)))

# merge basis plu to p10 + pba40 plu
p10_basis_pba40_boc = pd.merge(left=p10_pba40_plu, right=basis_boc, left_on='PARCEL_ID', right_on='parcel_id_basis', how='left')

p10_basis_pba40_boc.drop(columns = ['id_pba40','name_pba40','plandate_pba40','jz_o'],inplace = True)
display(p10_basis_pba40_boc.dtypes)

Read 1,933,226 rows from C:\Users\ywang\Documents\\Files_for_Py\BASIS\BOC\inputs\p10_boc_opt_b_v1d_tbl.csv
Number of parcels missing allowable type for HS: 204,054 or 10.6%
Number of parcels missing allowable type for HT: 204,069 or 10.6%
Number of parcels missing allowable type for HM: 210,129 or 10.9%
Number of parcels missing allowable type for OF: 210,029 or 10.9%
Number of parcels missing allowable type for HO: 210,129 or 10.9%
Number of parcels missing allowable type for SC: 210,103 or 10.9%
Number of parcels missing allowable type for IL: 204,857 or 10.6%
Number of parcels missing allowable type for IW: 210,111 or 10.9%
Number of parcels missing allowable type for IH: 210,072 or 10.9%
Number of parcels missing allowable type for RS: 210,129 or 10.9%
Number of parcels missing allowable type for RB: 210,625 or 10.9%
Number of parcels missing allowable type for MR: 210,129 or 10.9%
Number of parcels missing allowable type for MT: 210,129 or 10.9%
Number of parcels missing allowable

PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
COUNTY_ID                        int32
geom_id_s                       object
jurisdiction                     int64
CTY_NAME                        object
geom_id                         object
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                 

In [8]:
###### Bring in zoning scenarios data

zmod_file = os.path.join(input_dir,'2020_04_17_parcels_geography.csv')
zmod = pd.read_csv(
    zmod_file,
    usecols = ['PARCEL_ID','geom_id','juris_id','pba50zoningmodcat','nodev'])

print("Read {:,} rows from {}".format(len(zmod), zmod_file))
display(zmod.head())

# append _zmod to column names to clarify source of these columns
rename_cols = dict((col, col+"_zmod") for col in zmod.columns.values)
zmod.rename(columns=rename_cols, inplace=True)
display(zmod.dtypes)

# merge parcel data with zoning mods
p10_basis_pba40_boc["geom_id"] = pd.to_numeric(p10_basis_pba40_boc.geom_id_s)
print(p10_basis_pba40_boc.dtypes)

p10_basis_pba40_boc_zmod       = pd.merge(left=p10_basis_pba40_boc, right=zmod, 
                                              left_on='geom_id', right_on='geom_id_zmod', how = 'left')
print("Created p10_b10_basis_pba40_boc_zmod:")
display(p10_basis_pba40_boc_zmod.dtypes)

Read 1,956,208 rows from C:\Users\ywang\Documents\\Files_for_Py\BASIS\BOC\inputs\2020_04_17_parcels_geography.csv


Unnamed: 0,PARCEL_ID,geom_id,juris_id,pba50zoningmodcat,nodev
0,229116,10305106092872,livr,livermoreNANAHRADRNAinNA,0
1,244166,11107351665227,livr,livermoreNANADRNAinNA,0
2,202378,11030175960628,hayw,haywardNANANANAinNA,0
3,2004420,6381677629073,uson,unincorporated_sonomaNANADRNAoutNA,0
4,340332,314875459798,frem,fremontNANAHRADRNAinNA,1


PARCEL_ID_zmod             int64
geom_id_zmod               int64
juris_id_zmod             object
pba50zoningmodcat_zmod    object
nodev_zmod                 int64
dtype: object

PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
COUNTY_ID                        int32
geom_id_s                       object
jurisdiction                     int64
CTY_NAME                        object
geom_id                          int64
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                 

PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
COUNTY_ID                        int32
geom_id_s                       object
jurisdiction                     int64
CTY_NAME                        object
geom_id                          int64
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                 

In [9]:
###### Export PLU BOC data to csv

output_columns = [
    'PARCEL_ID','COUNTY_ID', 'CTY_NAME', 'ACRES',
    'juris_id_zmod',
    
    # intensity
    'max_far_basis',   'max_far_pba40',
    'max_dua_basis',   'max_dua_pba40',
    'max_height_basis','max_height_pba40',
    'nodev_zmod',      'nodev_pba40',

    # BASIS metadata
    'building_types_source_basis','source_basis',
    'plu_id_basis','plu_jurisdiction_basis','plu_description_basis'
]
# allowed building types
for btype in ALLOWED_BUILDING_TYPE_CODES:
    output_columns.append(btype + "_basis")
    output_columns.append(btype + "_pba40")

plu_boc_output = p10_basis_pba40_boc_zmod[output_columns]

plu_boc_output.to_csv(os.path.join(output_dir, today+'_p10_plu_boc_allAttrs.csv'), index = False)