In [1]:
import pandas as pd

In [2]:
# establish excel writer / workbook (and don't re-run)
writer = pd.ExcelWriter("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/CalEEMod_LandUseVars_PBA50EIR.xlsx")

In [66]:
# select input file for summarization- will be an EIR run + year (2015 or 2050)
file = "run182_building_data_2050_FBP.csv"
# name output excel sheet 
sheet = "FBP 2050"

In [67]:
#import building data and subset columns
b = pd.read_csv("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/"+file)
b = b[['zone_id', 'building_id', 'parcel_id', 'source', 'building_type', 'residential_units', 'non_residential_sqft']]

In [68]:
# import parcel info and subset columns
p = pd.read_csv("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/2021_02_25_parcels_geography.csv")
p = p[['PARCEL_ID', 'ACRES']]

In [69]:
# import TAZ-county crosswalk
xwalk = pd.read_csv("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/taz_geography.csv")
xwalk = xwalk[['zone', 'county']]

In [70]:
# replace county numbers with county names
xwalk.loc[xwalk.county == 1, "county"] = "San Francisco"
xwalk.loc[xwalk.county == 2, "county"] = "San Mateo"
xwalk.loc[xwalk.county == 3, "county"] = "Santa Clara"
xwalk.loc[xwalk.county == 4, "county"] = "Alameda"
xwalk.loc[xwalk.county == 5, "county"] = "Contra Costa"
xwalk.loc[xwalk.county == 6, "county"] = "Solano"
xwalk.loc[xwalk.county == 7, "county"] = "Napa"
xwalk.loc[xwalk.county == 8, "county"] = "Sonoma"
xwalk.loc[xwalk.county == 9, "county"] = "Marin"

In [71]:
# join parcel acres to buildings
b = b.merge(p, left_on='parcel_id', right_on='PARCEL_ID', how='left')

In [72]:
# join county to buildings using zone_id
b = b.merge(xwalk, left_on='zone_id', right_on='zone', how='left')

In [73]:
# create DUA column from building and parcel data
b["dua"] = b.residential_units / b.ACRES

In [74]:
# building types
#HS: single family residential
#HT: townhomes
#HM: multi family residential
#OF: office
#HO: hotel
#SC: school
#IL: light industrial
#IW: warehouse
#IH: heavy industrial
#RS: retail general
#RB: retail big box
#MR: mixed use residential focused
#MT: mixed use industiral focused
#ME: mixed use employment focused

In [75]:
# add single family and multifamily building vars
b.loc[(b.building_type == 'HS') | (b.building_type == 'HT'), 'SF'] = b.residential_units
b.loc[(b.building_type == 'HM') | (b.building_type == 'MR') | (b.building_type == 'PA2') | (b.building_type == 'RS') |
      (b.building_type == 'OF') | (b.building_type == 'IL') | (b.building_type == 'HO') | (b.building_type == 'SC'), 
      'MF'] = b.residential_units

In [76]:
# add multifamily low, mid, and high rise building vars
b.loc[(b.MF > 0) & (b.dua < 32), 'MF_low'] = b.residential_units
b.loc[(b.MF > 0) & (b.dua >= 32)& (b.dua < 50), 'MF_mid'] = b.residential_units
b.loc[(b.MF > 0) & (b.dua >= 50), 'MF_high'] = b.residential_units

In [77]:
# add office, retail, industrial, and commercial_other building vars
b.loc[(b.building_type == 'OF'), 'office'] = b.non_residential_sqft
b.loc[(b.building_type == 'ME'), 'mixed_use_emp'] = b.non_residential_sqft
b.loc[(b.building_type == 'RS'), 'retail_general'] = b.non_residential_sqft
b.loc[(b.building_type == 'RB'), 'retail_big_box'] = b.non_residential_sqft
b.loc[(b.building_type == 'MR'), 'mixed_use_res'] = b.non_residential_sqft
b.loc[(b.building_type == 'IL'), 'industrial_light'] = b.non_residential_sqft
b.loc[(b.building_type == 'IH'), 'industrial_heavy'] = b.non_residential_sqft
b.loc[(b.building_type == 'IW'), 'warehouse'] = b.non_residential_sqft
b.loc[(b.building_type == 'MT'), 'mixed_use_industrial'] = b.non_residential_sqft
b.loc[(b.building_type == 'HO'), 'hotel'] = b.non_residential_sqft
b.loc[(b.building_type == 'SC'), 'school'] = b.non_residential_sqft

In [78]:
# debug check- res
debug_res = b[(b.residential_units > 0) & (b.SF.isnull()) & (b.MF.isnull())]
debug_res.building_type.value_counts()

Series([], Name: building_type, dtype: int64)

In [79]:
# debug check- multifamlily 
debug_mf = b[(b.MF > 0) & (b.MF_low.isnull()) & (b.MF_mid.isnull()) & (b.MF_high.isnull())]
debug_mf

Unnamed: 0,zone_id,building_id,parcel_id,source,building_type,residential_units,non_residential_sqft,PARCEL_ID,ACRES,zone,...,mixed_use_emp,retail_general,retail_big_box,mixed_use_res,industrial_light,industrial_heavy,warehouse,mixed_use_industrial,hotel,school
1807128,353,1843295,2054505,h5_inputs,HM,1655.0,0.0,,,353,...,,,,,,,,,,


In [80]:
# move building without a PARCEL_ID/ACRES to MF_high manually
b.loc[(b.building_id == 1843295), 'MF_high'] = b.residential_units
# re-check
debug_mf = b[(b.MF > 0) & (b.MF_low.isnull()) & (b.MF_mid.isnull()) & (b.MF_high.isnull())]
debug_mf

Unnamed: 0,zone_id,building_id,parcel_id,source,building_type,residential_units,non_residential_sqft,PARCEL_ID,ACRES,zone,...,mixed_use_emp,retail_general,retail_big_box,mixed_use_res,industrial_light,industrial_heavy,warehouse,mixed_use_industrial,hotel,school


In [81]:
# debug check- nonres
debug_nonres = b[(b.non_residential_sqft > 0) & (b.office.isnull()) & (b.mixed_use_emp.isnull()) & 
                 (b.retail_general.isnull()) & (b.retail_big_box.isnull()) & (b.mixed_use_res.isnull()) & 
                 (b.industrial_light.isnull()) & (b.industrial_heavy.isnull()) & (b.warehouse.isnull()) & 
                 (b.mixed_use_industrial.isnull()) & (b.hotel.isnull()) & (b.school.isnull())]
print(debug_nonres.building_type.value_counts())
# remaining non res sqft are in HS, HM, HT

HS    18404
HM     3441
HT       12
Name: building_type, dtype: int64


In [82]:
county_summary = b[["SF", 'MF', "MF_low", "MF_mid", "MF_high", "office", "mixed_use_emp", "retail_general", 
                    "retail_big_box", "mixed_use_res", "industrial_light", "industrial_heavy",
                    "warehouse", "mixed_use_industrial", "hotel", "school", "county"]].groupby('county').sum()

In [83]:
county_summary

Unnamed: 0_level_0,SF,MF,MF_low,MF_mid,MF_high,office,mixed_use_emp,retail_general,retail_big_box,mixed_use_res,industrial_light,industrial_heavy,warehouse,mixed_use_industrial,hotel,school
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Alameda,370969.0,522819.0,116356.0,89996.0,316467.0,189136400.0,1247090.0,64720403.0,8507673.0,15718792.0,74638163.0,10500357.0,133418922.0,0.0,11163305.0,21156689.0
Contra Costa,379336.0,206268.0,44445.0,44800.0,117023.0,85884480.0,0.0,47842933.0,4176959.0,3232822.0,34727026.0,6312963.0,25569076.0,0.0,4571760.0,16917210.0
Marin,91410.0,58978.0,21941.0,20497.0,16540.0,27929930.0,0.0,8359741.0,549700.4,2108715.0,0.0,0.0,4832766.0,0.0,834981.0,2893389.0
Napa,47255.0,15086.0,8275.0,3298.0,3513.0,3151911.0,0.0,11121400.0,0.0,238394.0,16074505.0,5050.0,9596063.0,0.0,1752606.0,0.0
San Francisco,107850.0,488245.0,25899.0,29599.0,432747.0,190102300.0,2121450.0,25168623.0,5663007.0,49415699.0,1643409.0,0.0,25170852.0,0.0,21278428.0,11076191.0
San Mateo,189842.0,218669.0,36884.0,47546.0,134239.0,106616700.0,0.0,29260539.0,1956800.0,4320592.0,13057907.0,1063516.0,35353390.0,0.0,10518816.0,14440778.0
Santa Clara,429596.0,695590.0,93268.0,96893.0,505429.0,254247100.0,9820000.0,126189993.0,8832801.0,28726655.0,179170651.0,709796.0,44406098.0,0.0,467922.0,19309848.0
Solano,144378.0,50649.0,26856.0,8845.0,14948.0,21590060.0,0.0,28736171.0,30623.0,1231327.0,37809985.0,0.0,23255941.0,0.0,2398765.0,722745.0
Sonoma,166240.0,76438.0,42563.0,14088.0,19787.0,62176590.0,96278.0,25978400.0,278015.8,2117385.0,1190180.0,5509.0,33804203.0,0.0,4420954.0,972325.0


In [84]:
# run checks
print("compare total res units to SF + MF")
print(b.residential_units.sum())
print(county_summary.SF.sum() + county_summary.MF.sum())
print("compare total MF units to low/mid/high")
print(b.MF.sum())
print(county_summary.MF_low.sum() + county_summary.MF_mid.sum() + county_summary.MF_high.sum())
print("compare total non-res sqft to non-res sqft by category")
print(round(b.non_residential_sqft.sum()))
print(round(county_summary.office.sum() + county_summary.mixed_use_emp.sum() + county_summary.retail_general.sum() + 
            county_summary.retail_big_box.sum() + county_summary.mixed_use_res.sum() + 
            county_summary.industrial_light.sum() + county_summary.industrial_heavy.sum() + 
            county_summary.warehouse.sum() + county_summary.mixed_use_industrial.sum() + county_summary.hotel.sum() + 
            county_summary.school.sum()))

compare total res units to SF + MF
4259618.0
4259618.0
compare total MF units to low/mid/high
2332742.0
2332742.0
compare total non-res sqft to non-res sqft by category
2432883388.0
2315817516.0


In [85]:
# confirm that the non-res sqft discrepancy matches the residential non-res sqft
print(debug_nonres.non_residential_sqft.sum())
print(round(b.non_residential_sqft.sum()) - round(county_summary.office.sum() + county_summary.mixed_use_emp.sum() + 
      county_summary.retail_general.sum() + county_summary.retail_big_box.sum() + county_summary.mixed_use_res.sum() + 
      county_summary.industrial_light.sum() + county_summary.industrial_heavy.sum() + county_summary.warehouse.sum() + 
      county_summary.mixed_use_industrial.sum() + county_summary.hotel.sum() + county_summary.school.sum()))

117065872.0
117065872.0


In [86]:
# write to excel workbook
county_summary.to_excel(writer, sheet_name=sheet)
writer.save()