In [1]:
import pandas as pd

In [2]:
# establish excel writer / workbook (and don't re-run)
writer = pd.ExcelWriter("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/CalEEMod_LandUseVars_PBA50EIR.xlsx")

In [63]:
# select input file for summarization- will be an EIR run + year (2015 or 2050)
file = "run182_building_data_2050_FBP.csv"
# name output excel sheet 
sheet = "FBP 2050"

In [64]:
#import building data and subset columns
b = pd.read_csv("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/"+file)
b = b[['zone_id', 'building_id', 'parcel_id', 'source', 'building_type', 'residential_units', 'non_residential_sqft']]

In [65]:
# import parcel info and subset columns
p = pd.read_csv("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/2021_02_25_parcels_geography.csv")
p = p[['PARCEL_ID', 'ACRES']]

In [66]:
# import TAZ-county crosswalk
xwalk = pd.read_csv("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/taz_geography.csv")
xwalk = xwalk[['zone', 'county']]

In [67]:
# replace county numbers with county names
xwalk.loc[xwalk.county == 1, "county"] = "San Francisco"
xwalk.loc[xwalk.county == 2, "county"] = "San Mateo"
xwalk.loc[xwalk.county == 3, "county"] = "Santa Clara"
xwalk.loc[xwalk.county == 4, "county"] = "Alameda"
xwalk.loc[xwalk.county == 5, "county"] = "Contra Costa"
xwalk.loc[xwalk.county == 6, "county"] = "Solano"
xwalk.loc[xwalk.county == 7, "county"] = "Napa"
xwalk.loc[xwalk.county == 8, "county"] = "Sonoma"
xwalk.loc[xwalk.county == 9, "county"] = "Marin"

In [68]:
# join parcel acres to buildings
b = b.merge(p, left_on='parcel_id', right_on='PARCEL_ID', how='left')

In [69]:
# join county to buildings using zone_id
b = b.merge(xwalk, left_on='zone_id', right_on='zone', how='left')

In [70]:
# create DUA column from building and parcel data
b["dua"] = b.residential_units / b.ACRES

In [71]:
# building types
#HS: single family residential
#HT: townhomes
#HM: multi family residential
#OF: office
#HO: hotel
#SC: school
#IL: light industrial
#IW: warehouse
#IH: heavy industrial
#RS: retail general
#RB: retail big box
#MR: mixed use residential focused
#MT: mixed use industiral focused
#ME: mixed use employment focused

In [72]:
# add single family and multifamily building vars
b.loc[(b.building_type == 'HS') | (b.building_type == 'HT'), 'SF'] = b.residential_units
b.loc[(b.building_type == 'HM') | (b.building_type == 'MR') | (b.building_type == 'PA2') | (b.building_type == 'RS') |
      (b.building_type == 'OF') | (b.building_type == 'IL') | (b.building_type == 'HO') | (b.building_type == 'SC'), 
      'MF'] = b.residential_units

In [73]:
# add multifamily low, mid, and high rise building vars
b.loc[(b.MF > 0) & (b.dua < 32), 'MF_low'] = b.residential_units
b.loc[(b.MF > 0) & (b.dua >= 32)& (b.dua < 50), 'MF_mid'] = b.residential_units
b.loc[(b.MF > 0) & (b.dua >= 50), 'MF_high'] = b.residential_units

In [74]:
# add office, retail, industrial, and commercial_other building vars
b.loc[(b.building_type == 'OF') | (b.building_type == 'ME'), 'office'] = b.non_residential_sqft
b.loc[(b.building_type == 'RS') | (b.building_type == 'RB')| (b.building_type == 'MR'), 
      'retail'] = b.non_residential_sqft
b.loc[(b.building_type == 'IL') | (b.building_type == 'IH') | (b.building_type == 'IW') | (b.building_type == 'MT'), 
      'industrial'] = b.non_residential_sqft
b.loc[(b.building_type == 'HO') | (b.building_type == 'SC'), 'commercial_other'] = b.non_residential_sqft

In [75]:
# debug check- res
debug_res = b[(b.residential_units > 0) & (b.SF.isnull()) & (b.MF.isnull())]
debug_res.building_type.value_counts()

Series([], Name: building_type, dtype: int64)

In [76]:
# debug check- multifamlily 
debug_mf = b[(b.MF > 0) & (b.MF_low.isnull()) & (b.MF_mid.isnull()) & (b.MF_high.isnull())]
debug_mf

Unnamed: 0,zone_id,building_id,parcel_id,source,building_type,residential_units,non_residential_sqft,PARCEL_ID,ACRES,zone,...,dua,SF,MF,MF_low,MF_mid,MF_high,office,retail,industrial,commercial_other
1807128,353,1843295,2054505,h5_inputs,HM,1655.0,0.0,,,353,...,,,1655.0,,,,,,,


In [77]:
# move building without a PARCEL_ID/ACRES to MF_high manually
b.loc[(b.building_id == 1843295), 'MF_high'] = b.residential_units
# re-check
debug_mf = b[(b.MF > 0) & (b.MF_low.isnull()) & (b.MF_mid.isnull()) & (b.MF_high.isnull())]
debug_mf

Unnamed: 0,zone_id,building_id,parcel_id,source,building_type,residential_units,non_residential_sqft,PARCEL_ID,ACRES,zone,...,dua,SF,MF,MF_low,MF_mid,MF_high,office,retail,industrial,commercial_other


In [78]:
# debug check- nonres
debug_nonres = b[(b.non_residential_sqft > 0) & (b.retail.isnull()) & (b.office.isnull()) & (b.industrial.isnull()) & 
                 (b.commercial_other.isnull())]
debug_nonres.building_type.value_counts()
# remaining non res sqft are in HS, HM, HT

HS    18404
HM     3441
HT       12
Name: building_type, dtype: int64

In [79]:
county_summary = b[["SF", 'MF', "MF_low", "MF_mid", "MF_high", "office", "retail", "industrial", 
                        "commercial_other", "county"]].groupby('county').sum()

In [80]:
county_summary

Unnamed: 0_level_0,SF,MF,MF_low,MF_mid,MF_high,office,retail,industrial,commercial_other
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Alameda,370969.0,522819.0,116356.0,89996.0,316467.0,190383500.0,88946870.0,218557442.0,32319994.0
Contra Costa,379336.0,206268.0,44445.0,44800.0,117023.0,85884480.0,55252710.0,66609065.0,21488970.0
Marin,91410.0,58978.0,21941.0,20497.0,16540.0,27929930.0,11018160.0,4832766.0,3728370.0
Napa,47255.0,15086.0,8275.0,3298.0,3513.0,3151911.0,11359790.0,25675618.0,1752606.0
San Francisco,107850.0,488245.0,25899.0,29599.0,432747.0,192223800.0,80247330.0,26814261.0,32354619.0
San Mateo,189842.0,218669.0,36884.0,47546.0,134239.0,106616700.0,35537930.0,49474813.0,24959594.0
Santa Clara,429596.0,695590.0,93268.0,96893.0,505429.0,264067100.0,163749400.0,224286545.0,19777770.0
Solano,144378.0,50649.0,26856.0,8845.0,14948.0,21590060.0,29998120.0,61065926.0,3121510.0
Sonoma,166240.0,76438.0,42563.0,14088.0,19787.0,62272870.0,28373800.0,34999892.0,5393279.0


In [81]:
# run checks
print("compare total res units to SF + MF")
print(b.residential_units.sum())
print(county_summary.SF.sum() + county_summary.MF.sum())
print("compare total MF units to low/mid/high")
print(b.MF.sum())
print(county_summary.MF_low.sum() + county_summary.MF_mid.sum() + county_summary.MF_high.sum())
print("compare total non-res sqft to non-res sqft by category")
print(round(b.non_residential_sqft.sum()))
print(round(county_summary.office.sum() + county_summary.retail.sum() + county_summary.industrial.sum() + 
            county_summary.commercial_other.sum()))

compare total res units to SF + MF
4259618.0
4259618.0
compare total MF units to low/mid/high
2332742.0
2332742.0
compare total non-res sqft to non-res sqft by category
2432883388.0
2315817516.0


In [82]:
# write to excel workbook
county_summary.to_excel(writer, sheet_name=sheet)
writer.save()