In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import simpledbf
import fiona
from datetime import datetime

pd.options.display.max_rows = 100

PyTables is not installed. No support for HDF output.
SQLalchemy is not installed. No support for SQL output.


### Data sources and constants

In [2]:
BOX_URBAN_MODELING_PBA50 = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim 1.5\\PBA50".format(
    os.getenv('USERNAME'))
# Downloaded from "2020 03 12\smelt.gdb" https://mtcdrive.box.com/s/sgy1uorcgt7uhh29fja7v93c21ppiudq
SMELT_GDB_DIR            = "M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12"
# Downloaded from "UrbanSim Data Review Option B v1d.gdb.zip" (https://mtcdrive.box.com/s/whruh4nazdlqpa7e5f1s4o90nglq95oc)
BASIS_GDB                = "M:\\Data\\GIS layers\\UrbanSim BASIS\\UrbanSim Data Review Option B v1d_2020apr03.gdb"

OUTPUT_DIR               = os.path.join(BOX_URBAN_MODELING_PBA50,"Input Data Analysis")

if os.getenv('USERNAME')=='ywang':
    GITHUB_BAUS = "location of bayarea_urbansim github repo"
elif os.getenv('USERNAME')=='lzorn':
    GITHUB_BAUS = "C:\\Users\\lzorn\\Documents\\bayarea_urbansim"

ALLOWED_BUILDING_TYPE_CODES = ["HS","HT","HM","OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]
RES_BUILDING_TYPE_CODES     = ["HS","HT","HM",                                        "MR"          ]
NONRES_BUILDING_TYPE_CODES  = [               "OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]

# used in calculate_capacity()
SQUARE_FEET_PER_ACRE                = 43560.0
SQUARE_FEET_PER_DU                  = 1200.0
FEET_PER_STORY                      = 11.0
SQUARE_FEET_PER_EMPLOYEE            = 350.0
SQUARE_FEET_PER_EMPLOYEE_OFFICE     = 175.0
SQUARE_FEET_PER_EMPLOYEE_INDUSTRIAL = 500.0

In [3]:
# Read basemap parcels (e.g. p10)
# this is an export from the table in the gdb done via
# https://github.com/BayAreaMetro/petrale/blob/master/basemap/export_filegdb_layers.py
basemap_p10_file = os.path.join(SMELT_GDB_DIR, "p10.csv")
basemap_p10 = pd.read_csv(
    basemap_p10_file, 
    usecols =['PARCEL_ID','geom_id_s','COUNTY_ID','jurisdiction','ACRES','LAND_VALUE','pda_id','zoningmodcat'],
    dtype   ={'PARCEL_ID':np.float64, 'geom_id_s':str, 'COUNTY_ID':np.int32, 'juristiction':str, 
              'ACRES':np.float64, 'LAND_VALUE':np.float64, 'pda_id':str, 'zoningmodcat':'str'})
print("Read {:,} rows from {}".format(len(basemap_p10), basemap_p10_file))
display(basemap_p10.head())

Read 1,956,208 rows from M:\Data\GIS layers\UrbanSim smelt\2020 03 12\p10.csv


Unnamed: 0,PARCEL_ID,LAND_VALUE,ACRES,COUNTY_ID,geom_id_s,jurisdiction,pda_id,zoningmodcat
0,229116.0,0.0,3.36052,1,10305106092872,41992,,41992NANANANA
1,244166.0,0.0,1.294423,1,11107351665227,41992,,41992NANANANA
2,202378.0,6036500.0,14.993605,1,11030175960628,33000,,33000NANANANA
3,2004420.0,179954.0,316.247146,97,6381677629073,97,,00097NANANANA
4,340332.0,0.0,0.621275,1,314875459798,26000,,26000NAb1NANA


## 1 Merge data sets

### 1.1 Parcels (p10) with PBA40 zoning designations

In [4]:
## parcel id to pba40 zoning_id
pba40_pz_file = os.path.join(BOX_URBAN_MODELING_PBA50,"Current PBA50 Large General Input Data","2020_03_06_zoning_parcels.csv")
pba40_pz = pd.read_csv(
    pba40_pz_file,
    usecols=['PARCEL_ID','zoning_id','nodev_pba40'],
    dtype  ={'PARCEL_ID':np.float64, 'zoning_id':np.float64, 'nodev_pba40':np.int})
print("Read {:,} rows from {}".format(len(pba40_pz), pba40_pz_file))
display(pba40_pz.head())

## add zoning_id, nodev_pba40 columns to p10
p10_pba40_pz = pd.merge(left=basemap_p10, right=pba40_pz, on='PARCEL_ID', how='left')
#display(p10_pba40_pz.head())

## Check Number of parcels missing zoning designation
p10_pba40_pz_missing = p10_pba40_pz.loc[p10_pba40_pz['zoning_id'].isnull()]
print("Out of {0:,} p10 parcels, {1:,} or {2:.1f}% are missing 'zoning_id' values".format(
    len(p10_pba40_pz), len(p10_pba40_pz_missing), 100.0*len(p10_pba40_pz_missing)/len(p10_pba40_pz)))

Read 1,950,733 rows from C:\Users\lzorn\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\PBA50\Current PBA50 Large General Input Data\2020_03_06_zoning_parcels.csv


Unnamed: 0,zoning_id,nodev_pba40,PARCEL_ID
0,12202.0,0,191124.0
1,12204.0,0,197219.0
2,12204.0,0,197218.0
3,12204.0,0,188301.0
4,12204.0,0,188939.0


Out of 1,956,208 p10 parcels, 446,628 or 22.8% are missing 'zoning_id' values


### 1.2 Parcels (p10) with PBA40 zoning (intensity and allowed building types)

In [5]:
## definition of pba40 zoning
pba40_plu_file = os.path.join(GITHUB_BAUS, "data", "zoning_lookup.csv")
pba40_plu = pd.read_csv(pba40_plu_file, dtype={'id':float})
print("Read {:,} rows from {}".format(len(pba40_plu), pba40_plu_file))
# print(pba40_plu.dtypes)
# coerce this column to float -- it's a string for some reason
pba40_plu["SC"] = pd.to_numeric(pba40_plu["SC"], errors='coerce')
# print(pba40_plu.dtypes)

# append _pba40 to column names
rename_cols = dict((col,col+"_pba40") for col in pba40_plu.columns.values)
pba40_plu.rename(columns=rename_cols, inplace=True)
display(pba40_plu.head())

# check duplicates in zoning id
pba40_plu['jz_o'] = pba40_plu['city_pba40'].str.cat(pba40_plu['name_pba40'],sep=" ")
print("Out of {:,} rows in pba40_plu, {:,} have unique values of 'id', {:,} have unique values of 'jz_o'".format(
    len(pba40_plu), len(pba40_plu.id_pba40.unique()), len(pba40_plu.jz_o.unique())))

# using the zoning_id, get the pba40 zoning data (intensities, allowed building types)
p10_pba40_plu = pd.merge(left=p10_pba40_pz, right=pba40_plu, left_on='zoning_id', right_on='id_pba40', how='left')

# Check number of p10 records failed to find a matching PLU
p10_pba40_plu_missing = p10_pba40_plu.loc[p10_pba40_plu['jz_o'].isnull()]
print("Out of {0:,} rows in p10_pba40_plu, {1:,} or {2:.1f}% are missing 'jz_o' values".format(
    len(p10_pba40_plu), len(p10_pba40_plu_missing), 100.0*len(p10_pba40_plu_missing)/len(p10_pba40_plu)))

display(p10_pba40_plu.head())

Read 5,156 rows from C:\Users\lzorn\Documents\bayarea_urbansim\data\zoning_lookup.csv


Unnamed: 0,id_pba40,juris_pba40,city_pba40,name_pba40,max_far_pba40,max_height_pba40,max_dua_pba40,max_du_per_parcel_pba40,HS_pba40,HT_pba40,...,SC_pba40,IL_pba40,IW_pba40,IH_pba40,RS_pba40,RB_pba40,MR_pba40,MT_pba40,ME_pba40,plandate_pba40
0,2101.0,1.0,Albany,RHD,0.5,35.0,9.0,,1,0,...,0.0,0,0,0,0,0,0,0,0,
1,2102.0,1.0,Albany,R-1,0.55,28.0,12.0,,1,0,...,1.0,0,0,0,0,0,0,0,0,
2,2103.0,1.0,Albany,R-2,0.55,35.0,35.0,,1,1,...,1.0,0,0,0,0,0,0,0,0,
3,2104.0,1.0,Albany,R-3,1.5,35.0,63.0,,1,1,...,1.0,0,0,0,0,0,0,0,0,
4,2105.0,1.0,Albany,R.4,,,87.0,,0,0,...,0.0,0,0,0,0,0,0,0,0,


Out of 5,156 rows in pba40_plu, 5,156 have unique values of 'id', 4,536 have unique values of 'jz_o'
Out of 1,956,208 rows in p10_pba40_plu, 446,628 or 22.8% are missing 'jz_o' values


Unnamed: 0,PARCEL_ID,LAND_VALUE,ACRES,COUNTY_ID,geom_id_s,jurisdiction,pda_id,zoningmodcat,zoning_id,nodev_pba40,...,IL_pba40,IW_pba40,IH_pba40,RS_pba40,RB_pba40,MR_pba40,MT_pba40,ME_pba40,plandate_pba40,jz_o
0,229116.0,0.0,3.36052,1,10305106092872,41992,,41992NANANANA,60126.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Livermore 107 - Urban Low Residential UL2
1,244166.0,0.0,1.294423,1,11107351665227,41992,,41992NANANANA,11903.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Livermore GP-ULM
2,202378.0,6036500.0,14.993605,1,11030175960628,33000,,33000NANANANA,11803.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Hayward LDR
3,2004420.0,179954.0,316.247146,97,6381677629073,97,,00097NANANANA,12975.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Unincorporated Sonoma LEA240
4,340332.0,0.0,0.621275,1,314875459798,26000,,26000NAb1NANA,2511.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,Fremont RESM4


### 1.3 Parcels (p10) with BASIS BOC

In [6]:
## Read BASIS BOC
# this is an export from the table in the gdb done via
# https://github.com/BayAreaMetro/petrale/blob/master/basemap/export_filegdb_layers.py
basis_boc_file = os.path.join(BASIS_GDB, "..", "p10_boc_opt_b_v1d_tbl.csv")
basis_boc_columns = [
    'parcel_id','max_height','max_dua','max_far',
    'plu_id','plu_jurisdiction','plu_description',
    'building_types_source','source'] + [btype.lower() for btype in ALLOWED_BUILDING_TYPE_CODES]
# most are float
basis_boc_dtypes = dict((x, float) for x in basis_boc_columns)
# except these
basis_boc_dtypes['plu_id'               ] = str
basis_boc_dtypes['plu_jurisdiction'     ] = str
basis_boc_dtypes['plu_description'      ] = str
basis_boc_dtypes['building_types_source'] = str
basis_boc_dtypes['source'               ] = str

basis_boc = pd.read_csv(basis_boc_file, usecols = basis_boc_columns, dtype   =basis_boc_dtypes)
print("Read {:,} rows from {}".format(len(basis_boc), basis_boc_file))

# append _basis to column names to differentiate between basis PLU and pba40 PLU between 
rename_cols = {}
for col in basis_boc.columns.values:
    # rename the ht, hm, etc to HT_basis, HM_basis, ...
    if len(col)==2:
        rename_cols[col] = col.upper() + "_basis"
    else:
        rename_cols[col] = col + "_basis"
basis_boc.rename(columns=rename_cols, inplace=True)

display(basis_boc.head())
display(basis_boc.dtypes)

# report on missing allowed building types
for btype in ALLOWED_BUILDING_TYPE_CODES:
    null_btype_count = len(basis_boc.loc[basis_boc["{}_basis".format(btype)].isnull()])
    print('Number of parcels missing allowable type for {}: {:,} or {:.1f}%'.format(btype,
           null_btype_count, 100.0*null_btype_count/len(basis_boc)))

# merge basis plu to p10 + pba40 plu
p10_basis_pba40_boc = pd.merge(left=p10_pba40_plu, right=basis_boc, left_on='PARCEL_ID', right_on='parcel_id_basis', how='left')

p10_basis_pba40_boc.drop(columns = ['id_pba40','name_pba40','plandate_pba40','jz_o'],inplace = True)
display(p10_basis_pba40_boc.dtypes)

Read 1,933,226 rows from M:\Data\GIS layers\UrbanSim BASIS\UrbanSim Data Review Option B v1d_2020apr03.gdb\..\p10_boc_opt_b_v1d_tbl.csv


Unnamed: 0,parcel_id_basis,plu_id_basis,max_far_basis,max_dua_basis,max_height_basis,MT_basis,ME_basis,SC_basis,HO_basis,IW_basis,...,RB_basis,RS_basis,MR_basis,HM_basis,HT_basis,HS_basis,building_types_source_basis,source_basis,plu_description_basis,plu_jurisdiction_basis
0,8292.0,7e5767cf-590d-4b91-abc3-c62e2e7ddbbc,0.55,8.712,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,Ordinance (Joshua Croff),Zoning,Residential - Mixed Housing,Oakland
1,72959.0,b554e520-233b-43fe-941d-6cf00c8280cc,0.55,8.712,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,Ordinance (Joshua Croff),Zoning,Residential - Detached Unit,Oakland
2,9325.0,dd4b3b3d-aaea-42b5-ba1c-82bfe05855b4,0.55,8.712,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,Ordinance (Joshua Croff),Zoning,Residential - Mixed Housing,Oakland
3,322264.0,3c930453-9156-4fe2-ab91-07fe3236d6b7,0.35,0.0,75.0,,,,,,...,,,,,0.0,0.0,Industrial zn_description inference,,Industrial General,Fremont
4,11094.0,b554e520-233b-43fe-941d-6cf00c8280cc,0.55,8.712,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,Ordinance (Joshua Croff),Zoning,Residential - Detached Unit,Oakland


parcel_id_basis                float64
plu_id_basis                    object
max_far_basis                  float64
max_dua_basis                  float64
max_height_basis               float64
MT_basis                       float64
ME_basis                       float64
SC_basis                       float64
HO_basis                       float64
IW_basis                       float64
IH_basis                       float64
IL_basis                       float64
OF_basis                       float64
RB_basis                       float64
RS_basis                       float64
MR_basis                       float64
HM_basis                       float64
HT_basis                       float64
HS_basis                       float64
building_types_source_basis     object
source_basis                    object
plu_description_basis           object
plu_jurisdiction_basis          object
dtype: object

Number of parcels missing allowable type for HS: 204,054 or 10.6%
Number of parcels missing allowable type for HT: 204,069 or 10.6%
Number of parcels missing allowable type for HM: 210,129 or 10.9%
Number of parcels missing allowable type for OF: 210,029 or 10.9%
Number of parcels missing allowable type for HO: 210,129 or 10.9%
Number of parcels missing allowable type for SC: 210,103 or 10.9%
Number of parcels missing allowable type for IL: 204,857 or 10.6%
Number of parcels missing allowable type for IW: 210,111 or 10.9%
Number of parcels missing allowable type for IH: 210,072 or 10.9%
Number of parcels missing allowable type for RS: 210,129 or 10.9%
Number of parcels missing allowable type for RB: 210,625 or 10.9%
Number of parcels missing allowable type for MR: 210,129 or 10.9%
Number of parcels missing allowable type for MT: 210,129 or 10.9%
Number of parcels missing allowable type for ME: 210,191 or 10.9%


PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
COUNTY_ID                        int32
geom_id_s                       object
jurisdiction                     int64
pda_id                          object
zoningmodcat                    object
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                 

### 1.4 Bring in building data (b10) to determine parcel characteristics

In [7]:
## Building data to decide parcel status
# this is an export from the table in the gdb done via
# https://github.com/BayAreaMetro/petrale/blob/master/basemap/export_filegdb_layers.py
basemap_b10_file = os.path.join(SMELT_GDB_DIR, "b10.csv")
basemap_b10 = pd.read_csv(
    basemap_b10_file,
    dtype = {'parcel_id':np.float64})
print("Read {:,} rows from {}".format(len(basemap_b10), basemap_b10_file))
display(basemap_b10.head())
display(basemap_b10.dtypes)

print("Out of {:,} buildings, there are {:,} unique values of 'building_id' and {:,} unique values of 'parcel_id'".format(
    len(basemap_b10), len(basemap_b10.building_id.unique()), len(basemap_b10.parcel_id.unique())))

# count a building as "vacant" based on building's development_type_id
# https://mtcdrive.box.com/s/ce96tayc3palb3houagsc2bw2c3af5q3
basemap_b10["building_vacant"] = 0.0
basemap_b10.loc[basemap_b10.development_type_id== 0, "building_vacant"] = 1.0
basemap_b10.loc[basemap_b10.development_type_id==20, "building_vacant"] = 1.0

# merge building and parcel data w/ outer join
basemap_b10_p10 = pd.merge(left=basemap_b10, right=basemap_p10[['PARCEL_ID']], 
                           left_on='parcel_id', right_on='PARCEL_ID', how='outer')
print("basemap_b10_p10 has {:,} rows; head():".format(len(basemap_b10_p10)))
display(basemap_b10_p10.head())
display(basemap_b10_p10.dtypes)

# combine values for multiple buildings within one parcel
basemap_b10_p10_groupby_parcel = basemap_b10_p10.groupby(['PARCEL_ID']).agg({
    'improvement_value'   :'sum',
    'residential_units'   :'sum',
    'residential_sqft'    :'sum',
    'non_residential_sqft':'sum',
    'building_sqft'       :'sum',
    'redfin_sale_price'   :'sum',
    # 'costar_rent'         :'sum', # this is a string
    'year_built'          :'min',
    'building_id'         :'min',
    'building_vacant'     :'prod'}) # all buildings must be vacant to call this vacant

Read 1,843,351 rows from M:\Data\GIS layers\UrbanSim smelt\2020 03 12\b10.csv


Unnamed: 0,OBJECTID,building_id,parcel_id,development_type_id,improvement_value,residential_units,residential_sqft,sqft_per_unit,non_residential_sqft,building_sqft,nonres_rent_per_sqft,res_price_per_sqft,stories,year_built,redfin_sale_price,redfin_sale_year,redfin_home_type,costar_property_type,costar_rent,id
0,1,1,742974.0,1,0.0,1,2029,2029.42425,0,2029.42425,0.0,302.769751,1,1945,,,,,,1
1,2,2,744961.0,1,0.0,1,2029,2029.42425,0,2029.42425,0.0,254.429279,1,1965,,,,,,2
2,3,3,1442641.0,1,53262.87,1,1568,1568.0,0,1568.0,0.0,183.474166,1,1964,,,,,,3
3,4,4,190969.0,2,245000.0,0,0,1266.0,1595,1266.0,0.0,0.0,2,1992,340000.0,2003.0,Condo/Coop,,,4
4,5,5,308709.0,2,283500.0,0,0,1513.0,1513,1513.0,0.0,0.0,1,1978,442000.0,2004.0,Condo/Coop,,,5


OBJECTID                  int64
building_id               int64
parcel_id               float64
development_type_id       int64
improvement_value       float64
residential_units         int64
residential_sqft          int64
sqft_per_unit           float64
non_residential_sqft      int64
building_sqft           float64
nonres_rent_per_sqft    float64
res_price_per_sqft      float64
stories                   int64
year_built                int64
redfin_sale_price       float64
redfin_sale_year        float64
redfin_home_type         object
costar_property_type     object
costar_rent              object
id                        int64
dtype: object

Out of 1,843,351 buildings, there are 1,843,351 unique values of 'building_id' and 1,843,292 unique values of 'parcel_id'
basemap_b10_p10 has 2,369,891 rows; head():


Unnamed: 0,OBJECTID,building_id,parcel_id,development_type_id,improvement_value,residential_units,residential_sqft,sqft_per_unit,non_residential_sqft,building_sqft,...,stories,year_built,redfin_sale_price,redfin_sale_year,redfin_home_type,costar_property_type,costar_rent,id,building_vacant,PARCEL_ID
0,1.0,1.0,742974.0,1.0,0.0,1.0,2029.0,2029.42425,0.0,2029.42425,...,1.0,1945.0,,,,,,1.0,0.0,742974.0
1,2.0,2.0,744961.0,1.0,0.0,1.0,2029.0,2029.42425,0.0,2029.42425,...,1.0,1965.0,,,,,,2.0,0.0,744961.0
2,3.0,3.0,1442641.0,1.0,53262.87,1.0,1568.0,1568.0,0.0,1568.0,...,1.0,1964.0,,,,,,3.0,0.0,
3,4.0,4.0,190969.0,2.0,245000.0,0.0,0.0,1266.0,1595.0,1266.0,...,2.0,1992.0,340000.0,2003.0,Condo/Coop,,,4.0,0.0,190969.0
4,5.0,5.0,308709.0,2.0,283500.0,0.0,0.0,1513.0,1513.0,1513.0,...,1.0,1978.0,442000.0,2004.0,Condo/Coop,,,5.0,0.0,


OBJECTID                float64
building_id             float64
parcel_id               float64
development_type_id     float64
improvement_value       float64
residential_units       float64
residential_sqft        float64
sqft_per_unit           float64
non_residential_sqft    float64
building_sqft           float64
nonres_rent_per_sqft    float64
res_price_per_sqft      float64
stories                 float64
year_built              float64
redfin_sale_price       float64
redfin_sale_year        float64
redfin_home_type         object
costar_property_type     object
costar_rent              object
id                      float64
building_vacant         float64
PARCEL_ID               float64
dtype: object

In [8]:
# combine building/parcel data with all PLU (pba40 and boc)
p10_b10_basis_pba40_boc = pd.merge(left=p10_basis_pba40_boc, right=basemap_b10_p10_groupby_parcel,
                                   how="left", on="PARCEL_ID")

print("p10_b10_basis_pba40_boc has {:,} rows; head():".format(len(p10_b10_basis_pba40_boc)))
display(p10_b10_basis_pba40_boc.head())
display(p10_b10_basis_pba40_boc.dtypes)

# Investment-land ratio
p10_b10_basis_pba40_boc['ILR'] = p10_b10_basis_pba40_boc['improvement_value'] / p10_b10_basis_pba40_boc['LAND_VALUE']
p10_b10_basis_pba40_boc.loc[p10_b10_basis_pba40_boc['LAND_VALUE'] == 0, 'ILR'] = 'n/a'

# Vacant parcels
p10_b10_basis_pba40_boc["is_vacant"] = False
p10_b10_basis_pba40_boc.loc[ p10_b10_basis_pba40_boc['building_id'].isnull(),   "is_vacant" ] = True
p10_b10_basis_pba40_boc.loc[ p10_b10_basis_pba40_boc['building_vacant'] == 1.0, "is_vacant" ] = True
p10_b10_basis_pba40_boc.loc[(p10_b10_basis_pba40_boc['improvement_value'   ] == 0) & 
                            (p10_b10_basis_pba40_boc['residential_units'   ] == 0) &
                            (p10_b10_basis_pba40_boc['residential_sqft'    ] == 0) &
                            (p10_b10_basis_pba40_boc['non_residential_sqft'] == 0) &
                            (p10_b10_basis_pba40_boc['building_sqft'       ] == 0), "is_vacant"] = True
print("p10_b10_basis_pba40_boc.is_vacant:")
display(p10_b10_basis_pba40_boc["is_vacant"].value_counts())

# building age by year-built
p10_b10_basis_pba40_boc['building_age'] = 'missing'
p10_b10_basis_pba40_boc.loc[ p10_b10_basis_pba40_boc.year_built >= 2000, 'building_age' ] = 'after 2000'
p10_b10_basis_pba40_boc.loc[ p10_b10_basis_pba40_boc.year_built <  2000, 'building_age' ] = '1980-2000'
p10_b10_basis_pba40_boc.loc[ p10_b10_basis_pba40_boc.year_built <  1980, 'building_age' ] = '1930-1980'
p10_b10_basis_pba40_boc.loc[ p10_b10_basis_pba40_boc.year_built <  1930, 'building_age' ] = 'before 1930'
print("p10_b10_basis_pba40_boc.building_age:")
display(p10_b10_basis_pba40_boc["building_age"].value_counts())

p10_b10_basis_pba40_boc has 1,956,208 rows; head():


Unnamed: 0,PARCEL_ID,LAND_VALUE,ACRES,COUNTY_ID,geom_id_s,jurisdiction,pda_id,zoningmodcat,zoning_id,nodev_pba40,...,plu_jurisdiction_basis,improvement_value,residential_units,residential_sqft,non_residential_sqft,building_sqft,redfin_sale_price,year_built,building_id,building_vacant
0,229116.0,0.0,3.36052,1,10305106092872,41992,,41992NANANANA,60126.0,0.0,...,Livermore,0.0,0.0,0.0,0.0,0.0,0.0,,,1.0
1,244166.0,0.0,1.294423,1,11107351665227,41992,,41992NANANANA,11903.0,0.0,...,Livermore,0.0,0.0,0.0,0.0,0.0,0.0,,,1.0
2,202378.0,6036500.0,14.993605,1,11030175960628,33000,,33000NANANANA,11803.0,0.0,...,Hayward,0.0,20.0,101000.0,0.0,101000.0,1007250.0,2009.0,15681.0,0.0
3,2004420.0,179954.0,316.247146,97,6381677629073,97,,00097NANANANA,12975.0,0.0,...,Unincorporated Sonoma,146211.0,0.0,0.0,0.0,0.0,0.0,1965.0,17798.0,0.0
4,340332.0,0.0,0.621275,1,314875459798,26000,,26000NAb1NANA,2511.0,0.0,...,Fremont,0.0,0.0,0.0,0.0,0.0,0.0,,,1.0


PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
COUNTY_ID                        int32
geom_id_s                       object
jurisdiction                     int64
pda_id                          object
zoningmodcat                    object
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                 

p10_b10_basis_pba40_boc.is_vacant:


False    1422902
True      533306
Name: is_vacant, dtype: int64

p10_b10_basis_pba40_boc.building_age:


1930-1980      860879
missing        526540
1980-2000      269563
before 1930    174043
after 2000     125183
Name: building_age, dtype: int64

### 1.5 Bring in zoning modification (by scenario) configuration

In [9]:
## planned zoning scenarios
zmod_file = os.path.join(BOX_URBAN_MODELING_PBA50,"Current PBA50 Large General Input Data","2020_04_14_parcels_geography.csv")
zmod = pd.read_csv(zmod_file)
print("Read {:,} rows from {}".format(len(zmod), zmod_file))
display(zmod.head())

# append _zmod to column names to clarify source of these columns
rename_cols = dict((col, col+"_zmod") for col in zmod.columns.values)
zmod.rename(columns=rename_cols, inplace=True)
display(zmod.dtypes)

# merge parcel data with zoning mods
p10_b10_basis_pba40_boc["geom_id"] = pd.to_numeric(p10_b10_basis_pba40_boc.geom_id_s)
print(p10_b10_basis_pba40_boc.dtypes)

p10_b10_basis_pba40_boc_zmod       = pd.merge(left=p10_b10_basis_pba40_boc, right=zmod, 
                                              left_on='geom_id', right_on='geom_id_zmod', how = 'left')
print("Created p10_b10_basis_pba40_boc_zmod:")
display(p10_b10_basis_pba40_boc_zmod.dtypes)

  interactivity=interactivity, compiler=compiler, result=result)


Read 1,956,208 rows from C:\Users\lzorn\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\PBA50\Current PBA50 Large General Input Data\2020_04_14_parcels_geography.csv


Unnamed: 0.1,Unnamed: 0,geom_id,PARCEL_ID,urbanized,pda_id,tpp_id,exp_id,exp_score,opp_id,zoningmodcat,...,gg_id,tra_id,sesit_id,ppa_id,exp2020_id,exsfd_id,pba50chcat,pba50zoningmodcat,nodev,jurisdiction_id
0,0,10305106092872,229116,1,,,,0.0,,41992NANANANA,...,,,HRADR,,in,,NANAHRADRNA,livrNANAHRADRNAinNA,0,41992
1,1,11107351665227,244166,1,,,,0.0,,41992NANANANA,...,,,DR,,in,,NANADRNA,livrNANADRNAinNA,0,41992
2,2,11030175960628,202378,0,,,,0.0,,33000NANANANA,...,,,,,in,,NANANANA,haywNANANANAinNA,0,33000
3,3,6381677629073,2004420,0,,,,0.0,,00097NANANANA,...,,,DR,,out,,NANADRNA,usonNANADRNAoutNA,0,97
4,4,314875459798,340332,1,,b1,,0.0,,26000NAb1NANA,...,,,HRADR,,in,,NANAHRADRNA,fremNANAHRADRNAinNA,1,26000


Unnamed: 0_zmod             int64
geom_id_zmod                int64
PARCEL_ID_zmod              int64
urbanized_zmod              int64
pda_id_zmod                object
tpp_id_zmod                object
exp_id_zmod                object
exp_score_zmod            float64
opp_id_zmod                object
zoningmodcat_zmod          object
perffoot_zmod               int64
perfarea_zmod               int64
tpa_id_zmod                object
perfarea2_zmod              int64
alt_zoning_zmod            object
zonetype_zmod              object
pubopp_id_zmod             object
puboppuse_zmod              int64
hra_id_zmod                object
trich_id_zmod              object
cat_id_zmod                object
chcat_zmod                 object
zoninghzcat_zmod           object
juris_id_zmod              object
gg_id_zmod                 object
tra_id_zmod                object
sesit_id_zmod              object
ppa_id_zmod                object
exp2020_id_zmod            object
exsfd_id_zmod 

PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
COUNTY_ID                        int32
geom_id_s                       object
jurisdiction                     int64
pda_id                          object
zoningmodcat                    object
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                 

PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
COUNTY_ID                        int32
geom_id_s                       object
jurisdiction                     int64
pda_id                          object
zoningmodcat                    object
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                 

### 1.6 Export PLU BOC data to csv

In [10]:
output_columns = [
    'PARCEL_ID','COUNTY_ID',
    'juris_id_zmod',
    
    # intensity
    'max_far_basis',   'max_far_pba40',
    'max_dua_basis',   'max_dua_pba40',
    'max_height_basis','max_height_pba40',
    'nodev_zmod',      'nodev_pba40',

    # BASIS metadata
    'building_types_source_basis','source_basis',
    'plu_id_basis','plu_jurisdiction_basis','plu_description_basis'
]
# allowed building types
for btype in ALLOWED_BUILDING_TYPE_CODES:
    output_columns.append(btype + "_basis")
    output_columns.append(btype + "_pba40")

plu_boc_output = p10_b10_basis_pba40_boc_zmod[output_columns]

plu_boc_output.to_csv(os.path.join(OUTPUT_DIR, 'p10_plu_boc_allAttrs.csv'), index = False)

In [11]:
# Missing HM or MR but having 'nodev' as 0
missingHM = plu_boc_output.loc[(plu_boc_output.HM_basis.isnull()) & (plu_boc_output.nodev_zmod == 0),
                               ['PARCEL_ID','COUNTY_ID','juris_id_zmod','HM_pba40','HM_basis',
                                'building_types_source_basis','source_basis']]
missingHM.to_csv(os.path.join(OUTPUT_DIR, 'missingHM_basis.csv'),index = False)

missingMR = plu_boc_output.loc[(plu_boc_output.MR_basis.isnull()) & (plu_boc_output.nodev_zmod == 0),
                               ['PARCEL_ID','COUNTY_ID','juris_id_zmod','MR_pba40','MR_basis',
                                'building_types_source_basis','source_basis']]
missingMR.to_csv(os.path.join(OUTPUT_DIR, 'missingMR_basis.csv'),index = False)

## 2 Capacity statistics

In [12]:
# county_id value counts
print("p10_b10_basis_pba40_boc_zmod.COUNTY_ID.value_counts()")
display(p10_b10_basis_pba40_boc_zmod.COUNTY_ID.value_counts())

# nodev_zmod value counts
print("p10_b10_basis_pba40_boc_zmod.nodev_zmod.value_counts()")
display(p10_b10_basis_pba40_boc_zmod.nodev_zmod.value_counts())

display(p10_b10_basis_pba40_boc_zmod.dtypes)

p10_b10_basis_pba40_boc_zmod.COUNTY_ID.value_counts()


85    436446
1     389884
13    325208
81    195487
97    171997
75    153355
95    137961
41     96816
55     49053
0          1
Name: COUNTY_ID, dtype: int64

p10_b10_basis_pba40_boc_zmod.nodev_zmod.value_counts()


0    1893087
1      63121
Name: nodev_zmod, dtype: int64

PARCEL_ID                      float64
LAND_VALUE                     float64
ACRES                          float64
COUNTY_ID                        int32
geom_id_s                       object
jurisdiction                     int64
pda_id                          object
zoningmodcat                    object
zoning_id                      float64
nodev_pba40                    float64
juris_pba40                    float64
city_pba40                      object
max_far_pba40                  float64
max_height_pba40               float64
max_dua_pba40                  float64
max_du_per_parcel_pba40        float64
HS_pba40                       float64
HT_pba40                       float64
HM_pba40                       float64
OF_pba40                       float64
HO_pba40                       float64
SC_pba40                       float64
IL_pba40                       float64
IW_pba40                       float64
IH_pba40                       float64
RS_pba40                 

### 2.1 Allowed Building Type Statistics

In [13]:
COUNTY_DICT = {
    'Unknown'      :  0, # this shouldn't be needed but...
    'Alameda'      :  1,
    'Contra Costa' : 13,
    'Marin'        : 41,
    'Napa'         : 55,
    'San Francisco': 75,
    'San Mateo'    : 81,
    'Santa Clara'  : 85,
    'Solano'       : 95,
    'Sonoma'       : 97
}
COUNTY_MAP = pd.DataFrame(COUNTY_DICT.items(), columns=['county_name', 'COUNTY_ID'])

# aggregate allowed building type to county
aggregate_map = {}
for dev_type in ALLOWED_BUILDING_TYPE_CODES:
    aggregate_map[dev_type+"_basis"] = "sum"
    aggregate_map[dev_type+"_pba40"] = "sum"
# print(aggregate_map)

plu_boc_county = p10_b10_basis_pba40_boc_zmod.groupby(['COUNTY_ID']).agg(aggregate_map).reset_index()

for dev_type in ALLOWED_BUILDING_TYPE_CODES:
    # diff: basis - pba40
    plu_boc_county[dev_type+'_diff'    ] = plu_boc_county[dev_type+'_basis'] - plu_boc_county[dev_type+'_pba40']
    # pct diff: diff / pba40
    plu_boc_county[dev_type+'_diff_pct'] = plu_boc_county[dev_type+'_diff'] / plu_boc_county[dev_type+'_pba40']

display(plu_boc_county)

Unnamed: 0,COUNTY_ID,HS_basis,HS_pba40,HT_basis,HT_pba40,HM_basis,HM_pba40,OF_basis,OF_pba40,HO_basis,...,RS_diff,RS_diff_pct,RB_diff,RB_diff_pct,MR_diff,MR_diff_pct,MT_diff,MT_diff_pct,ME_diff,ME_diff_pct
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,,0.0,,0.0,,0.0,,0.0,
1,1,279178.0,276322.0,98626.0,153370.0,95443.0,111300.0,148745.0,23002.0,25626.0,...,41784.0,0.722032,15928.0,2.545629,41088.0,2.267299,53894.0,2.581625,65148.0,5.620083
2,13,216027.0,245540.0,108716.0,104544.0,116364.0,32862.0,118874.0,15538.0,84033.0,...,85175.0,5.118998,77129.0,20.711332,47778.0,11.465803,82413.0,12.939708,74099.0,13.980943
3,41,66989.0,80289.0,22470.0,25842.0,24469.0,14291.0,27271.0,3092.0,1949.0,...,21322.0,6.803446,9367.0,12.292651,13935.0,7.373016,15442.0,6.571064,15261.0,6.612218
4,55,39667.0,39684.0,28283.0,12947.0,26506.0,10671.0,4894.0,2294.0,4533.0,...,5872.0,3.752077,3699.0,3.059553,801.0,1.675732,724.0,0.68431,842.0,0.807287
5,75,131900.0,11887.0,57022.0,143283.0,28380.0,71920.0,8094.0,5052.0,2099.0,...,6212.0,0.575985,-1239.0,-0.706788,3730.0,0.277365,594.0,0.042158,-2578.0,-0.748983
6,81,159317.0,151693.0,53410.0,77237.0,58605.0,49922.0,61042.0,10407.0,28446.0,...,44132.0,5.309432,10484.0,2.044462,18417.0,2.554369,24240.0,5.350993,15178.0,3.973298
7,85,365739.0,201742.0,263258.0,147974.0,88990.0,28259.0,79491.0,10869.0,8860.0,...,-33711.0,-0.329431,19253.0,11.59121,22563.0,1.867025,22832.0,1.428607,40688.0,8.253144
8,95,79414.0,54594.0,46444.0,28699.0,18394.0,7992.0,25797.0,2555.0,2829.0,...,45418.0,12.515293,7750.0,3.628277,13260.0,12.64061,9459.0,6.501031,10738.0,7.610206
9,97,150437.0,77230.0,98194.0,40716.0,88942.0,32733.0,62823.0,3375.0,27399.0,...,67486.0,2.542803,24688.0,14.139748,73974.0,26.513978,53757.0,17.631027,71059.0,25.082598


### 2.2 Calculate Build Out Capacity for each parcel

In [14]:
def calculate_capacity(full_df,boc_source,nodev_source):
    """
    full_df:      parcel data frame with PBA40("_pba40") and BASIS("_basis") BOC attributes 
    boc_source:   BOC source, 'pba40' or 'basis' (for intensity/allowed building types)
    nodev_source: nodev source, 'pba40' or 'zmod'
    
    returns df with PARCEL_ID, ACRES and the following new columns:
     - allow_res_[boc_source]
     - calc_dua_[boc_source]
     - status_dua_[boc_source]
     - units[boc_source]
     
     - allow_nonres_[boc_source]
     - calc_far_[boc_source]
     - status_far_[boc_source]
     - sqft_[boc_source]
     - Ksqft_[boc_source]
     - emp_[boc_source]
    """
    # copy the df so we don't alter it
    df = full_df.copy()
    
    # Fill 'NaN' in allowed development types with 0 in order to calculate 'allow_res' and 'allow_nonres_'
    for dev_type in ALLOWED_BUILDING_TYPE_CODES:
        df[dev_type+"_"+boc_source].fillna(value=0.0, inplace = True)
    
    # allow_res is sum of allowed building types that are residential
    res_allowed_columns = [btype+'_'+boc_source for btype in RES_BUILDING_TYPE_CODES]
    df['allow_res_' +boc_source] = df[res_allowed_columns].sum(axis=1)
    df['status_dua_'+boc_source] = "good"

    # fill in missing values for max_DUA assuming a HU is 1200 square feet and a floor is 11 feet high
    missing_dua_idx = ((df['allow_res_'+boc_source  ] > 0     ) &
                       (df['max_dua_'  +boc_source  ].isnull()) &
                       (df['nodev_'    +nodev_source] == 0    ))
    print("Developable residential parcels missing 'max_dua_{}': {:,}".format(
        boc_source, len(df.loc[missing_dua_idx])))
    df.loc[missing_dua_idx,'max_dua_'   +boc_source] = SQUARE_FEET_PER_ACRE / SQUARE_FEET_PER_DU
    df.loc[missing_dua_idx,'calc_dua_'  +boc_source] = 'Yuqi'
    df.loc[missing_dua_idx,'status_dua_'+boc_source] = "calculated"

    # zero out max_DUA should be 0 for 'nodev' parcels or parcels that don't allow residential
    zero_dua_idx = (df['allow_res_'  +boc_source] == 0) | (df['nodev_'+nodev_source] == 1)
    df.loc[zero_dua_idx,'max_dua_'   +boc_source] = 0
    df.loc[zero_dua_idx,'calc_dua_'  +boc_source] = 'Yuqi'
    df.loc[zero_dua_idx,'status_dua_'+boc_source] = "calculated"
   
    # DUA calculations apply to parcels 'allowRes' and not marked as "nodev"
    df['units_'+boc_source] = df['ACRES'] * df['max_dua_'+boc_source]    

    # recalculate missing_dua_idx
    missing_dua_idx = ((df['allow_res_'+boc_source  ] > 0     ) &
                       (df['max_dua_'  +boc_source  ].isnull()) &
                       (df['nodev_'    +nodev_source] == 0    ))
    print("After filling in missing values, developable residential parcels missing 'max_dua_{}': {:,}\n".format(
         boc_source, len(df.loc[missing_dua_idx])))
    df.loc[missing_dua_idx,'status_dua_'+boc_source] = "missing"  # this shouldn't happen

    # allow_nonres is the sum of allowed building types that are non-residential
    nonres_allowed_columns = [btype+'_'+boc_source for btype in NONRES_BUILDING_TYPE_CODES]
    df['allow_nonres_'+boc_source] = df[nonres_allowed_columns].sum(axis=1)
    df['status_far_'  +boc_source] = "good"
    
    # fill in missing values for max_far assuming a floor is eleven feet tall and land coverage 50%
    missing_far_idx = ((df['allow_nonres_'+boc_source  ] > 0      ) & 
                       (df['max_far_'     +boc_source  ].isnull() ) & 
                       (df['nodev_'       +nodev_source] == 0     ))
    print("Developable non-residential parcels missing 'max_far_{}: {:,}".format(
        boc_source, len(df.loc[missing_far_idx])))
    
    # we can only fill in missing if max_height is not null
    calc_far_idx = missing_far_idx & df['max_height_' +boc_source].notnull()
    df.loc[calc_far_idx,'max_far_'   +boc_source] = 0.5 * (df.loc[calc_far_idx,'max_height_'+boc_source] / FEET_PER_STORY)
    df.loc[calc_far_idx,'calc_far_'  +boc_source] = 'Yuqi'
    df.loc[calc_far_idx,'status_far_'+boc_source] = "calculated"

    # Max FAR should be 0 for 'nodev' parcels or parcels that don't allow non-residential
    zero_far_idx = (df['allow_nonres_'+boc_source] == 0) | (df['nodev_'+nodev_source] == 1)
    df.loc[zero_far_idx,'max_far_'   +boc_source] = 0
    df.loc[zero_far_idx,'calc_far_'  +boc_source] = 'Yuqi'
    df.loc[zero_far_idx,'status_far_'+boc_source] = "calculated"

    # recalculate missing_far_idx
    missing_far_idx = ((df['allow_nonres_'+boc_source  ] > 0      ) & 
                       (df['max_far_'     +boc_source  ].isnull() ) & 
                       (df['nodev_'       +nodev_source] == 0     ))
    print("After filling in missing values: developable non-residential parcels missing 'max_far_{}': {:,}\n".format(
        boc_source, len(df.loc[missing_far_idx])))
    df.loc[missing_far_idx,'status_far_'+boc_source] = "missing"

    # FAR calculations apply to parcels 'allowNonRes' and not marked as "nodev"
    df['sqft_' +boc_source] = df['ACRES'] * df['max_far_'+boc_source] * SQUARE_FEET_PER_ACRE 
    df['Ksqft_'+boc_source] = df['sqft_'+boc_source]*0.001
    
    # of nonresidential uses, only office allowed
    office_idx   = (df['OF_'+boc_source] == 1) & (df['allow_nonres_'+boc_source]== 1)
    # of nonresidential uses, only industrial allowed
    allow_indust = df[['IL_'+boc_source,'IW_'+boc_source,'IH_'+boc_source]].sum(axis = 1)
    indust_idx   = (allow_indust > 0) & (df['allow_nonres_'+boc_source] == allow_indust)
    # calculate non-residential capacity in employment
    df[               'emp_'+boc_source] = df['sqft_'+boc_source] / SQUARE_FEET_PER_EMPLOYEE
    df.loc[office_idx,'emp_'+boc_source] = df['sqft_'+boc_source] / SQUARE_FEET_PER_EMPLOYEE_OFFICE
    df.loc[indust_idx,'emp_'+boc_source] = df['sqft_'+boc_source] / SQUARE_FEET_PER_EMPLOYEE_INDUSTRIAL
    
    return df[["PARCEL_ID", "ACRES",
               "allow_res_"    +boc_source,
               "calc_dua_"     +boc_source,
               "status_dua_"   +boc_source,
               "units_"        +boc_source,
               "allow_nonres_" +boc_source,
               "calc_far_"     +boc_source,
               "status_far_"   +boc_source,
               "sqft_"         +boc_source,
               "Ksqft_"        +boc_source,
               "emp_"          +boc_source]]

In [15]:
# Calculate PBA40 capacity 
cap_pba40 = calculate_capacity(p10_b10_basis_pba40_boc_zmod, "pba40", "pba40")
# Calculate PBA50 BOC capacity 
cap_basis = calculate_capacity(p10_b10_basis_pba40_boc_zmod, "basis", "zmod")

print("cap_pba40 has {:,} rows; head:".format(len(cap_pba40)))
display(cap_pba40.head())

print("cap_basis has {:,} rows; head:".format(len(cap_pba40)))
display(cap_basis.head())

Developable residential parcels missing 'max_dua_pba40': 134,406
After filling in missing values, developable residential parcels missing 'max_dua_pba40': 0

Developable non-residential parcels missing 'max_far_pba40: 194,622
After filling in missing values: developable non-residential parcels missing 'max_far_pba40': 40,142

Developable residential parcels missing 'max_dua_basis': 69,586
After filling in missing values, developable residential parcels missing 'max_dua_basis': 0

Developable non-residential parcels missing 'max_far_basis: 10,832
After filling in missing values: developable non-residential parcels missing 'max_far_basis': 10,681

cap_pba40 has 1,956,208 rows; head:


Unnamed: 0,PARCEL_ID,ACRES,allow_res_pba40,calc_dua_pba40,status_dua_pba40,units_pba40,allow_nonres_pba40,calc_far_pba40,status_far_pba40,sqft_pba40,Ksqft_pba40,emp_pba40
0,229116.0,3.36052,1.0,,good,6.721041,0.0,Yuqi,calculated,0.0,0.0,0.0
1,244166.0,1.294423,1.0,,good,3.883268,0.0,Yuqi,calculated,0.0,0.0,0.0
2,202378.0,14.993605,2.0,,good,130.444362,0.0,Yuqi,calculated,0.0,0.0,0.0
3,2004420.0,316.247146,1.0,,good,1.318751,0.0,Yuqi,calculated,0.0,0.0,0.0
4,340332.0,0.621275,3.0,,good,14.289334,1.0,Yuqi,calculated,63966.515861,63.966516,182.761474


cap_basis has 1,956,208 rows; head:


Unnamed: 0,PARCEL_ID,ACRES,allow_res_basis,calc_dua_basis,status_dua_basis,units_basis,allow_nonres_basis,calc_far_basis,status_far_basis,sqft_basis,Ksqft_basis,emp_basis
0,229116.0,3.36052,0.0,Yuqi,calculated,0.0,0.0,Yuqi,calculated,0.0,0.0,0.0
1,244166.0,1.294423,0.0,Yuqi,calculated,0.0,0.0,Yuqi,calculated,0.0,0.0,0.0
2,202378.0,14.993605,0.0,Yuqi,calculated,0.0,7.0,,good,0.0,0.0,0.0
3,2004420.0,316.247146,4.0,,good,5.268677,8.0,,good,0.0,0.0,0.0
4,340332.0,0.621275,0.0,Yuqi,calculated,0.0,0.0,Yuqi,calculated,0.0,0.0,0.0


In [16]:
# output some subset of capacity data?

capacity = pd.merge(left=cap_pba40, right=cap_basis, how="inner", on=["PARCEL_ID","ACRES"])
print("capacity has {:,} rows; head:".format(len(capacity)))
display(capacity.head())

## export data // will visualize in Tableau
# capacity.to_csv(os.path.join(OUTPUT_DIR, "devCapacity_allAttrs.csv"), index = False)

capacity has 1,956,208 rows; head:


Unnamed: 0,PARCEL_ID,ACRES,allow_res_pba40,calc_dua_pba40,status_dua_pba40,units_pba40,allow_nonres_pba40,calc_far_pba40,status_far_pba40,sqft_pba40,...,allow_res_basis,calc_dua_basis,status_dua_basis,units_basis,allow_nonres_basis,calc_far_basis,status_far_basis,sqft_basis,Ksqft_basis,emp_basis
0,229116.0,3.36052,1.0,,good,6.721041,0.0,Yuqi,calculated,0.0,...,0.0,Yuqi,calculated,0.0,0.0,Yuqi,calculated,0.0,0.0,0.0
1,244166.0,1.294423,1.0,,good,3.883268,0.0,Yuqi,calculated,0.0,...,0.0,Yuqi,calculated,0.0,0.0,Yuqi,calculated,0.0,0.0,0.0
2,202378.0,14.993605,2.0,,good,130.444362,0.0,Yuqi,calculated,0.0,...,0.0,Yuqi,calculated,0.0,7.0,,good,0.0,0.0,0.0
3,2004420.0,316.247146,1.0,,good,1.318751,0.0,Yuqi,calculated,0.0,...,4.0,,good,5.268677,8.0,,good,0.0,0.0,0.0
4,340332.0,0.621275,3.0,,good,14.289334,1.0,Yuqi,calculated,63966.515861,...,0.0,Yuqi,calculated,0.0,0.0,Yuqi,calculated,0.0,0.0,0.0


In [17]:
# Sum missing data by parcels and acres
for status_type in ['status_dua_pba40', 'status_far_pba40', 'status_dua_basis', 'status_far_basis']:
    status_df = capacity.groupby([status_type])['ACRES'].agg(['sum','count']).reset_index()
    status_df.rename(columns = {'sum':'total Acres','count':'count of parcels'}, inplace=True)
    status_df['total Acres'] = status_df['total Acres'].apply(lambda x: f'{int(x):,}')
    display(status_df)

Unnamed: 0,status_dua_pba40,total Acres,count of parcels
0,calculated,2994145,724387
1,good,1493957,1231821


Unnamed: 0,status_far_pba40,total Acres,count of parcels
0,calculated,4169318,1438213
1,good,299443,477853
2,missing,19340,40142


Unnamed: 0,status_dua_basis,total Acres,count of parcels
0,calculated,2722316,470504
1,good,1765786,1485704


Unnamed: 0,status_far_basis,total Acres,count of parcels
0,calculated,3268941,1094475
1,good,1213020,851052
2,missing,6140,10681


### 2.3 Zoning build-out-capacity at jurisdiction and county levels

In [None]:
"""
# BOC by jurisdiction function
def boc_j(df):
    boc_j = df.groupby(['JURIS'])['ACRES','units_10','units_18','sqft_10','sqft_18'].sum()

    boc_j['unit_diff'] = boc_j['units_18'] - boc_j['units_10']
    boc_j['sqft_diff'] = boc_j['sqft_18'] - boc_j['sqft_10']
    boc_j['unit_diff_pct'] = boc_j['unit_diff'] / boc_j['units_10']
    boc_j['sqft_diff_pct'] = boc_j['sqft_diff'] / boc_j['sqft_10']

    for i in ['units_10','units_18','unit_diff','sqft_10','sqft_18','sqft_diff']:
        boc_j[i] = boc_j[i].apply(lambda x: f'{int(x):,}')
    return boc_j

# BOC by county function
def boc_c(df):
    boc_cty = df.groupby(['COUNTY_ID'])['ACRES','units_10','units_18','sqft_10','sqft_18'].sum()
    boc_cty['unit_diff'] = boc_cty['units_18'] - boc_cty['units_10']
    boc_cty['sqft_diff'] = boc_cty['sqft_18'] - boc_cty['sqft_10']
    boc_cty['unit_diff_pct'] = boc_cty['unit_diff'] / boc_cty['units_10']
    boc_cty['sqft_diff_pct'] = boc_cty['sqft_diff'] / boc_cty['sqft_10']

    for i in ['units_10','units_18','unit_diff','sqft_10','sqft_18','sqft_diff']:
        boc_cty[i] = boc_cty[i].apply(lambda x: f'{int(x):,}')

    boc_cty = boc_cty.reset_index()
    boc_cty = boc_cty.loc[boc_cty['COUNTY_ID'] > 0]
    return boc_cty

# all parcels statistics
all_boc_j = boc_j(p10_capacity)
all_boc_j.to_csv('outputs/all_boc_jurisdiction.csv')

all_boc_c = boc_c(p10_capacity)
all_boc_c.to_csv('outputs/all_boc_county.csv')

# vacant parcel statistics
p_vac = p10_capacity.loc[p10_capacity.VACANT == 'vacant']

vac_boc_j = boc_j(p_vac)
vac_boc_j.to_csv('outputs/vac_boc_jurisdiction.csv')

vac_boc_c = boc_c(p_vac)
vac_boc_c.to_csv('outputs/vac_boc_county.csv')

# low ILR parcel statistics (threadhold 0.2)
p10_capacity.ILR = pd.to_numeric(p10_capacity.ILR, errors='coerce')
p_low_ILR = p10_capacity.loc[p10_capacity.ILR < 0.2]

low_ILR_boc_j = boc_j(p_low_ILR)
low_ILR_boc_j.to_csv('outputs/low_ILR_boc_jurisdiction.csv')

low_ILR_boc_c = boc_c(p_low_ILR)
low_ILR_boc_c.to_csv('outputs/low_ILR_boc_county.csv')

# Old building parcel statistics (1930-1980)
p10_capacity.year_built = pd.to_numeric(p10_capacity.YEAR_BUILT, errors='coerce')
p_old = p10_capacity.loc[(p10_capacity.YEAR_BUILT < 1980) & (p10_capacity.YEAR_BUILT >= 1930)]

old_boc_j = boc_j(p_old)
old_boc_j.to_csv('outputs/old_boc_jurisdiction.csv')

old_boc_c = boc_c(p_old)
old_boc_c.to_csv('outputs/old_boc_county.csv')
"""

## 3 PLU BOC Mapping

In [None]:
p10_pluboc_allAttrs = p10_capacity.merge(
    p10_geo[['PARCEL_ID','geometry']], on = 'PARCEL_ID', how = 'left').merge(
    p10_plu50_raw[['parcel_id','plu_jurisdiction','plu_code','plu_description']], left_on = 'PARCEL_ID', right_on = 'parcel_id', how = 'left')

p10_pluboc_allAttrs.rename(columns={'plu_jurisdiction': 'PLU_JURIS',
                             'plu_code': 'PLU_CODE', 
                             'plu_description': 'PLU_DESC', 
                             'geometry': 'GEOMETRY',
                             'BUILDING_TYPES_SOURCE_18':'B_TYPE_SRC',
                             'MAX_HEIGHT_10': 'MAX_H_10',
                             'MAX_HEIGHT_18': 'MAX_H_18',
                             'calc_DUA_10': 'cal_dua_10', 
                             'calc_DUA_18': 'cal_dua_18',
                             'calc_FAR_10': 'cal_far_10',
                             'calc_FAR_18': 'cal_far_18',
                             'allowNonRes_10': 'nonRes1_10', 
                             'allowRes_10': 'res1_10',
                             'allowNonRes_18': 'nonRes1_18', 
                             'allowRes_18': 'res1_18',
                             'NODEV_PBA40': 'NODEV_40', 
                             'NODEV_PBA50': 'NODEV_50',
                             'SOURCE_18': 'SRC'}, inplace = True)

p10_pluboc_allAttrs = p10_pluboc_allAttrs.where(pd.notnull(p10_pluboc_allAttrs), None)

pAttr = ['PARCEL_ID','ACRES','CTYNAME', 'COUNTY_ID', 'GEOMETRY','JURIS','PLU_JURIS', 'PLU_CODE', 'PLU_DESC']
pCond = ['B_AGE', 'YEAR_BUILT', 'ILR', 'VACANT']
devType = ['HS','HT','HM','OF','HO','SC','IL','IW','IH','RS','RB','MR','MT','ME']
noDev = ['NODEV']
allowType = ['res1','nonRes1']
intens = ['MAX_H','MAX_DUA','MAX_FAR']
capacity = ['units','sqft']
srs = ['B_TYPE_SRC', 'SRC','cal_dua_10','cal_dua_18','cal_far_10','cal_far_18']

In [None]:
def byCounty(df,cty_id):
    df_cty = df.loc[df.CTYNAME == cty_id]
    return df_cty

def exportData(df,fname):
    if (df.shape[0] > 0):
        df_geo = gpd.GeoDataFrame(df, geometry='GEOMETRY')
        df_geo.to_file(output_dir + '\\mapping\\' + fname + '.shp')
        df_csv = df.drop(columns = ['GEOMETRY'])
        df_csv.to_csv(output_dir + '\\mapping\\' + fname + '.csv', index = False)
    else:
        print('no records')

### 3.1 Export All Attributes

In [None]:
%%time
# export all attributes
"""
p10_pluboc_clean = p10_pluboc_allAttrs[pAttr + \
                              [x + '_10' for x in devType] + [x + '_18' for x in devType] + \
                              [x + '_10' for x in intens] + [x + '_18' for x in intens] + \
                              [x + '_10' for x in allowType] + [x + '_18' for x in allowType] + \
                              ['NODEV_40','NODEV_50'] + \
                              [x + '_10' for x in capacity] + [x + '_18' for x in capacity] + \
                              srs]

exportData(p10_pluboc_clean,'p10_pluboc_allAttrs')

for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(p10_pluboc_clean,i)
    print('exporting ' + i)
    fname = 'pluboc_' + i
    exportDate(df,fname)
"""

In [None]:
%%time

## export intensity
intensity = p10_pluboc_allAttrs[pAttr + [x + '_10' for x in intens] + [x + '_18' for x in intens] + srs]
exportData(intensity,'intensity')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(intensity,i)
    print('exporting ' + i)
    fname = 'devIntensity_' + i
    exportData(df,fname)
"""
# exportallowed development type
allowDevType = p10_pluboc_allAttrs[pAttr + [x + '_10' for x in devType] + [x + '_18' for x in devType] + srs[:2]]
exportData(allowDevType,'allowDevType')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(allowDevType,i)
    print('exporting ' + i)
    fname = 'allowDevType_' + i
    exportData(df,fname)
"""

### 3.2 Selected Attributes

In [None]:
%%time
## HM allowed in BASIS BOC
hm_boc = p10_pluboc_allAttrs.loc[(p10_pluboc_allAttrs.HM_18 == 1) & (p10_pluboc_allAttrs.NODEV_50 == 0)][pAttr + ['HM_18'] + srs[:2]]
exportData(hm_boc,'HM_BOC_allow')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(hm_boc,i)
    print('exporting ' + i)
    fname = 'HM_BOC_allow_' + i
    exportData(df,fname)
"""

In [None]:
%%time
## HM allowed: 4 categories, in pba40 plu only, in BASIS only, in both, in neither
p10_pluboc_allAttrs['HM_comp'] = np.nan
p10_pluboc_allAttrs['HM_comp'] = np.where((p10_pluboc_allAttrs.HM_10 == 1) & (p10_pluboc_allAttrs.HM_18 == 1),'both allow',np.where(
        (p10_pluboc_allAttrs.HM_10 == 1) & (p10_pluboc_allAttrs.HM_18 == 0),'only pba40 allow',np.where(
        (p10_pluboc_allAttrs.HM_10 == 0) & (p10_pluboc_allAttrs.HM_18 == 1),'only BASIS allow',np.where(
        (p10_pluboc_allAttrs.HM_10 == 0) & (p10_pluboc_allAttrs.HM_18 == 0),'both not allow',np.where(
        (p10_pluboc_allAttrs.HM_10.notnull()) & (p10_pluboc_allAttrs.HM_18.isnull()),'missing BASIS BOC',np.where(
        p10_pluboc_allAttrs.NODEV_50 == 1, 'NoDev','other'))))))
            

hm_comp = p10_pluboc_allAttrs[pAttr + ['HM_comp'] + srs[:2]]
exportData(hm_comp,'HM_comp')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(hm_comp,i)
    print('exporting ' + i)
    fname = 'HM_comp_' + i
    exportData(df,fname)
"""

In [None]:
%%time
## MR allowed but HM no allowed in BASIS BOC 
p10_pluboc_allAttrs['MR_noHM_boc'] = np.nan
p10_pluboc_allAttrs['MR_noHM_boc'] = np.where((p10_pluboc_allAttrs.HM_18 == 0) & 
                                              (p10_pluboc_allAttrs.MR_18 == 1),'MR_noHM_boc',np.where((
                                                  p10_pluboc_allAttrs.HM_18.isnull()) & 
                                              (p10_pluboc_allAttrs.MR_18 == 1),'MR_nanHM_boc','other'))

MR_noHM_boc = p10_pluboc_allAttrs.query("MR_noHM_boc == 'MR_noHM_boc' | MR_noHM_boc == 'MR_nanHM_boc'")
MR_noHM_boc = MR_noHM_boc[pAttr + ['MR_noHM_boc'] + srs[:2]]
exportData(MR_noHM_boc,'MR_noHM_boc')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(MR_noHM_boc,i)
    print('exporting ' + i)
    fname = 'MR_noHM_boc_' + i
    exportData(df,fname)
"""

In [None]:
%%time

## integrated residential capacity (HM HS HT or MR allowed AND what is DUA: raw or calculated from FAR or height)
resOnly = p10_pluboc_allAttrs.loc[((p10_pluboc_allAttrs.res1_10 == True) | (p10_pluboc_allAttrs.res1_18 == True)) & (p10_pluboc_allAttrs.NODEV_50 == 0)]

## chk if correctly categorized parcels that allow residential
display(resOnly.loc[resOnly.res1_18 == True][['HM_18','HS_18','HT_18','MR_18']].sum(axis = 1).value_counts())
display(resOnly.loc[resOnly.res1_10 == True][['HM_10','HS_10','HT_10','MR_10']].sum(axis = 1).value_counts())

## chk if missing units value
display(resOnly.loc[(resOnly.units_18 == 0) & (resOnly.res1_18 == True) & (resOnly.NODEV_50 == 0)].shape)
display(resOnly.loc[(resOnly.units_10 == 0) & (resOnly.res1_10 == True) & (resOnly.NODEV_40 == 0)].shape)

## export
res_cap_18 = resOnly[pAttr + ['units_18'] + pCond + srs[:2] + ['cal_dua_18']]
exportData(res_cap_18,'res_capacity_18')

res_cap_10 = resOnly[pAttr + ['units_10'] + pCond + srs[:2] + ['cal_dua_10']]
exportData(res_cap_10,'res_capacity_10')

res_DUA_18 = resOnly[pAttr + ['MAX_DUA_18'] + pCond + srs[:2] + ['cal_dua_18']]
exportData(res_DUA_18,'res_DUA_18')

res_DUA_10 = resOnly[pAttr + ['MAX_DUA_10'] + pCond + srs[:2] + ['cal_dua_10']]
exportData(res_DUA_10,'res_DUA_10')

"""
for i in ['Alameda', 'Contra Costa','Marin','Napa','San Francisco','San Mateo','Santa Clara','Solano', 'Sonoma']:
    df = byCounty(resOnly,i)
    print('exporting ' + i)
    fname = 'resCapacity_' + i
    exportData(df,fname)
"""

In [None]:
%%time
## Employment capacity in thousands of square feet: calculate for all categories that allow employment 
## Employment capacity in employees
    ## Assume a parcel that only allows office is 175 sqft per employee; 
    ## Assume a parcel that only allows IH IL or IW is 500 sqft per employee; 
    ## assume any other parcels with comm is 350 per employee

nonResOnly = p10_pluboc_allAttrs.loc[((p10_pluboc_allAttrs.nonRes1_10 == True) | (p10_pluboc_allAttrs.nonRes1_18 == True)) & (p10_pluboc_allAttrs.NODEV_50 == 0)]

## chk if correctly categorized parcels that allow non-residential
display(nonResOnly.loc[nonResOnly.nonRes1_18 == True][[x + '_18' for x in nonRes]].sum(axis = 1).value_counts())
display(nonResOnly.loc[nonResOnly.nonRes1_10 == True][[x + '_10' for x in nonRes]].sum(axis = 1).value_counts())

## chk: if missing sqft value
display(nonResOnly.loc[(nonResOnly.Ksqft_18 == 0) & (nonResOnly.nonRes1_18 == True) & (nonResOnly.NODEV_50 == 0)].shape)
display(nonResOnly.loc[(nonResOnly.Ksqft_10 == 0) & (nonResOnly.nonRes1_10 == True) & (nonResOnly.NODEV_40 == 0)].shape)

display(nonResOnly.loc[(nonResOnly.Ksqft_18 == 0) & (nonResOnly.nonRes1_18 == True) & (nonResOnly.NODEV_50 == 0)]['MAX_H_18'].value_counts())
display(nonResOnly.loc[(nonResOnly.Ksqft_10 == 0) & (nonResOnly.nonRes1_10 == True) & (nonResOnly.NODEV_40 == 0)]['MAX_H_10'].value_counts())

## export
nonRes_cap_18 = nonResOnly[pAttr + ['Ksqft_18'] + pCond + srs[:2] + ['cal_far_18']]
exportData(nonRes_cap_18,'nonRes_capacity_18')

nonRes_cap_10 = nonResOnly[pAttr + ['Ksqft_10'] + pCond + srs[:2] + ['cal_far_10']]
exportData(nonRes_cap_10,'nonRes_capacity_10')

nonRes_emp_18 = nonResOnly[pAttr + ['emp_18'] + pCond + srs[:2] + ['cal_far_18']]
exportData(nonRes_emp_18,'nonRes_emp_18')

nonRes_emp_10 = nonResOnly[pAttr + ['emp_10'] + pCond + srs[:2] + ['cal_far_10']]
exportData(nonRes_emp_10,'nonRes_emp_10')