In [1]:
import pandas as pd

In [2]:
# establish excel writer / workbook (and don't re-run)
writer = pd.ExcelWriter("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/CalEEMod_LandUseVars_PBA50EIR.xlsx")

In [67]:
# select input file for summarization- will be an EIR run + year (2015 or 2050)
file = "run314_building_data_2050_NP.csv"
# name output excel sheet 
sheet = "NP 2050"

In [68]:
#import building data and subset columns
b = pd.read_csv("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/"+file)
b = b[['zone_id', 'building_id', 'parcel_id', 'source', 'building_type', 'residential_units', 'non_residential_sqft']]

In [69]:
# import parcel info and subset columns
p = pd.read_csv("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/2021_02_25_parcels_geography.csv")
p = p[['PARCEL_ID', 'ACRES']]

In [70]:
# import TAZ-county crosswalk
xwalk = pd.read_csv("C:/Users/etheocharides/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50/CalEEMod/taz_geography.csv")
xwalk = xwalk[['zone', 'county']]

In [71]:
# replace county numbers with county names
xwalk.loc[xwalk.county == 1, "county"] = "San Francisco"
xwalk.loc[xwalk.county == 2, "county"] = "San Mateo"
xwalk.loc[xwalk.county == 3, "county"] = "Santa Clara"
xwalk.loc[xwalk.county == 4, "county"] = "Alameda"
xwalk.loc[xwalk.county == 5, "county"] = "Contra Costa"
xwalk.loc[xwalk.county == 6, "county"] = "Solano"
xwalk.loc[xwalk.county == 7, "county"] = "Napa"
xwalk.loc[xwalk.county == 8, "county"] = "Sonoma"
xwalk.loc[xwalk.county == 9, "county"] = "Marin"

In [72]:
# join parcel acres to buildings
b = b.merge(p, left_on='parcel_id', right_on='PARCEL_ID', how='left')

In [73]:
# join county to buildings using zone_id
b = b.merge(xwalk, left_on='zone_id', right_on='zone', how='left')

In [74]:
# create DUA column from building and parcel data
b["dua"] = b.residential_units / b.ACRES

In [75]:
# building types
#HS: single family residential
#HT: townhomes
#HM: multi family residential
#OF: office
#HO: hotel
#SC: school
#IL: light industrial
#IW: warehouse
#IH: heavy industrial
#RS: retail general
#RB: retail big box
#MR: mixed use residential focused
#MT: mixed use industiral focused
#ME: mixed use employment focused

In [76]:
# add single family and multifamily building vars
b.loc[(b.building_type == 'HS') | (b.building_type == 'HT'), 'SF'] = b.residential_units
b.loc[(b.building_type == 'HM') | (b.building_type == 'MR') | (b.building_type == 'PA2') | (b.building_type == 'RS') |
      (b.building_type == 'OF') | (b.building_type == 'IL') | (b.building_type == 'HO') | (b.building_type == 'SC'), 
      'MF'] = b.residential_units

In [77]:
# add multifamily low, mid, and high rise building vars
b.loc[(b.MF > 0) & (b.dua < 32), 'MF_low'] = b.residential_units
b.loc[(b.MF > 0) & (b.dua >= 32)& (b.dua < 50), 'MF_mid'] = b.residential_units
b.loc[(b.MF > 0) & (b.dua >= 50), 'MF_high'] = b.residential_units

In [78]:
# add office, retail, industrial, and commercial_other building vars
b.loc[(b.building_type == 'OF') | (b.building_type == 'ME'), 'office'] = b.non_residential_sqft
b.loc[(b.building_type == 'RS') | (b.building_type == 'RB')| (b.building_type == 'MR'), 
      'retail'] = b.non_residential_sqft
b.loc[(b.building_type == 'IL') | (b.building_type == 'IH') | (b.building_type == 'IW') | (b.building_type == 'MT'), 
      'industrial'] = b.non_residential_sqft
b.loc[(b.building_type == 'HO') | (b.building_type == 'SC'), 'commercial_other'] = b.non_residential_sqft

In [79]:
county_summary = b[["SF", 'MF', "MF_low", "MF_mid", "MF_high", "office", "retail", "industrial", 
                        "commercial_other", "county"]].groupby('county').sum()

In [80]:
county_summary

Unnamed: 0_level_0,SF,MF,MF_low,MF_mid,MF_high,office,retail,industrial,commercial_other
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Alameda,420305.0,420748.0,99211.0,59970.0,261567.0,180728400.0,97085740.0,204154857.0,32303114.0
Contra Costa,498344.0,211507.0,41942.0,69507.0,100058.0,77737610.0,59011310.0,51874952.0,21488970.0
Marin,92460.0,41967.0,21657.0,9789.0,10521.0,28598520.0,11089920.0,4427530.0,2986257.0
Napa,51215.0,15520.0,8416.0,3635.0,3469.0,3480191.0,12528620.0,25248262.0,1752606.0
San Francisco,116480.0,405905.0,28202.0,46519.0,331184.0,201383000.0,102981800.0,29963292.0,32354619.0
San Mateo,210612.0,194335.0,31616.0,55248.0,107471.0,108608400.0,33848080.0,44081895.0,23852232.0
Santa Clara,499778.0,617680.0,73740.0,64405.0,477880.0,291583800.0,173656700.0,216044678.0,19777770.0
Solano,152705.0,47532.0,24286.0,5895.0,17351.0,17707210.0,27945680.0,44623827.0,3015491.0
Sonoma,178936.0,78219.0,39462.0,8101.0,30656.0,58613630.0,28706290.0,32147996.0,5174078.0


In [81]:
# run checks
print("compare total res units to SF + MF")
print(b.residential_units.sum())
print(county_summary.SF.sum() + county_summary.MF.sum())
print("compare total MF units to low/mid/high")
print(b.MF.sum())
print(county_summary.MF_low.sum() + county_summary.MF_mid.sum() + county_summary.MF_high.sum())
print("compare total non-res sqft to non-res sqft by category")
print(round(b.non_residential_sqft.sum()))
print(round(county_summary.office.sum() + county_summary.retail.sum() + county_summary.industrial.sum() + 
            county_summary.industrial.sum()))

compare total res units to SF + MF
4254248.0
4254248.0
compare total MF units to low/mid/high
2033413.0
2031758.0
compare total non-res sqft to non-res sqft by category
2437371936.0
2820429470.0


In [82]:
# debug check- res
debug_res = b[(b.residential_units > 0) & (b.SF.isnull()) & (b.MF.isnull())]
debug_res.building_type.value_counts()

Series([], Name: building_type, dtype: int64)

In [83]:
# debug check- multifamlily 
debug_mf = b[(b.MF > 0) & (b.MF_low.isnull()) & (b.MF_mid.isnull()) & (b.MF_high.isnull())]
debug_mf

Unnamed: 0,zone_id,building_id,parcel_id,source,building_type,residential_units,non_residential_sqft,PARCEL_ID,ACRES,zone,...,dua,SF,MF,MF_low,MF_mid,MF_high,office,retail,industrial,commercial_other
1821000,353,1843295,2054505,h5_inputs,HM,1655.0,0.0,,,353,...,,,1655.0,,,,,,,


In [84]:
# move building without a PARCEL_ID/ACRES to MF_high manually
b.loc[(b.building_id == 1843295), 'MF_high'] = b.residential_units
# re-check
debug_mf = b[(b.MF > 0) & (b.MF_low.isnull()) & (b.MF_mid.isnull()) & (b.MF_high.isnull())]
debug_mf

Unnamed: 0,zone_id,building_id,parcel_id,source,building_type,residential_units,non_residential_sqft,PARCEL_ID,ACRES,zone,...,dua,SF,MF,MF_low,MF_mid,MF_high,office,retail,industrial,commercial_other


In [85]:
# debug check- nonres
debug_nonres = b[(b.non_residential_sqft > 0) & (b.retail.isnull()) & (b.office.isnull()) & (b.industrial.isnull()) & 
           (b.commercial_other.isnull())]
debug_nonres.building_type.value_counts()
# remaining non res sqft are in HS, HM, HT

HS    19519
HM     4851
HT       12
Name: building_type, dtype: int64

In [86]:
# write to excel workbook
county_summary.to_excel(writer, sheet_name=sheet)
writer.save()