In [1]:
import pandas as pd
import glob
import plotly.graph_objects as go

# =====================================
# FILE LOCATIONS

In [2]:
measureddata_meta = "../../the-building-data-genome-project/data/raw/meta_open.csv"
measureddata_timeseries = "../../the-building-data-genome-project/data/raw/temp_open_utc.csv"
folder_woOccVar = "C:/a/BldgSync/Simulation_Files_woOCC"
folder_wOccVar = "C:/a/BldgSync/Simulation_Files"

# =====================================
# CREATE SUMMARY FROM METADATA

In [3]:
df_summary = pd.read_csv(measureddata_meta, usecols=['uid', 'primaryspaceusage','heatingtype','numberoffloors','sqft','timezone','yearbuilt'])
df_summary = df_summary.rename(columns={"uid": "buildingid"})
df_summary

Unnamed: 0,buildingid,heatingtype,numberoffloors,primaryspaceusage,sqft,timezone,yearbuilt
0,PrimClass_Everett,,,Primary/Secondary Classroom,105530.00000,America/New_York,
1,UnivClass_Clifford,,,College Classroom,56969.00000,America/New_York,1967
2,Office_Elizabeth,,,Office,294651.00000,America/Los_Angeles,
3,Office_Ellie,,,Office,496517.00000,America/Los_Angeles,
4,PrimClass_Elisabeth,,,Primary/Secondary Classroom,233062.00000,America/New_York,
5,Office_Cristina,,,Office,92271.00000,America/New_York,2001
6,PrimClass_Jolie,Gas,2.0,Primary/Secondary Classroom,31505.93530,Europe/London,pre 1919
7,PrimClass_Jaylin,Gas,2.0,Primary/Secondary Classroom,47070.53470,Europe/London,1919-1945
8,Office_Jesus,Gas,1.0,Office,42646.57180,Europe/London,Post 1976
9,PrimClass_Esmeralda,,,Primary/Secondary Classroom,276443.00000,America/New_York,


In [4]:
df_timeseries = pd.read_csv(measureddata_timeseries)
summary_table_measured = df_timeseries.sum()
summary_table_measured = pd.DataFrame(summary_table_measured)
summary_table_measured = summary_table_measured.drop('timestamp')
summary_table_measured = summary_table_measured*3.412
summary_table_measured.columns = ['AnnualTotalElectricity_kBtu']

df_summary['elec_kbtu_measured'] = df_summary['buildingid'].map(summary_table_measured['AnnualTotalElectricity_kBtu'])
df_summary

Unnamed: 0,buildingid,heatingtype,numberoffloors,primaryspaceusage,sqft,timezone,yearbuilt,elec_kbtu_measured
0,PrimClass_Everett,,,Primary/Secondary Classroom,105530.00000,America/New_York,,71451.7
1,UnivClass_Clifford,,,College Classroom,56969.00000,America/New_York,1967,42251.2
2,Office_Elizabeth,,,Office,294651.00000,America/Los_Angeles,,262366
3,Office_Ellie,,,Office,496517.00000,America/Los_Angeles,,460770
4,PrimClass_Elisabeth,,,Primary/Secondary Classroom,233062.00000,America/New_York,,225613
5,Office_Cristina,,,Office,92271.00000,America/New_York,2001,99076.6
6,PrimClass_Jolie,Gas,2.0,Primary/Secondary Classroom,31505.93530,Europe/London,pre 1919,35196.3
7,PrimClass_Jaylin,Gas,2.0,Primary/Secondary Classroom,47070.53470,Europe/London,1919-1945,58960.5
8,Office_Jesus,Gas,1.0,Office,42646.57180,Europe/London,Post 1976,53859.7
9,PrimClass_Esmeralda,,,Primary/Secondary Classroom,276443.00000,America/New_York,,386029


# =====================================
# READ/SUMMARIZE SIMULATED DATA: WITHOUT OCC VAR

In [5]:
allfiles = glob.glob(folder_woOccVar + "/**/**/**/hourly_consumption_by_fuel_to_csv_report.csv")
df_simulated1 = []
for filename in allfiles:
    
    df = pd.read_csv(filename)
    elec = df['Electricity'].sum()*0.000000277778*3.412 #J to kBtu
    ng = df['Natural Gas'].sum()*0.000000277778*3.412 #J to kBtu
        
    df_simulated1.append([filename.split('\\')[1],elec,ng])
    
df_simulated1 = pd.DataFrame(df_simulated1)
df_simulated1.columns = ['bldg','elec_kbtu_simulated_woOccVar','ng_kbtu_simulated_woOccVar']
df_simulated1

Unnamed: 0,bldg,elec_kbtu_simulated_woOccVar,ng_kbtu_simulated_woOccVar
0,Office_Abbey,4.645003e+06,1.948350e+05
1,Office_Abigail,7.172108e+05,4.241893e+04
2,Office_Al,6.616578e+06,2.535166e+05
3,Office_Alannah,6.844596e+05,4.031032e+04
4,Office_Aliyah,1.215807e+07,4.673834e+05
5,Office_Allyson,1.745076e+06,8.143633e+04
6,Office_Alyson,8.562370e+06,3.192206e+05
7,Office_Amelia,4.479675e+05,3.045683e+04
8,Office_Amelie,2.488549e+06,1.089415e+05
9,Office_Anastasia,8.161488e+06,3.057468e+05


# =====================================
# READ/SUMMARIZE SIMULATED DATA: WITH OCC VAR

In [6]:
allfiles = glob.glob(folder_wOccVar + "/**/**/**/hourly_consumption_by_fuel_to_csv_report.csv")
df_simulated2 = []
for filename in allfiles:
    
    df = pd.read_csv(filename)
    elec = df['Electricity'].sum()*0.000000277778*3.412 #J to kBtu
    ng = df['Natural Gas'].sum()*0.000000277778*3.412 #J to kBtu
        
    df_simulated2.append([filename.split('\\')[1],elec,ng])
    
df_simulated2 = pd.DataFrame(df_simulated2)
df_simulated2.columns = ['bldg','elec_kbtu_simulated_wOccVar','ng_kbtu_simulated_wOccVar']
df_simulated2

Unnamed: 0,bldg,elec_kbtu_simulated_wOccVar,ng_kbtu_simulated_wOccVar
0,Office_Abbey,4.111074e+06,1.702918e+05
1,Office_Abigail,5.817519e+05,4.034952e+04
2,Office_Al,6.025884e+06,2.104459e+05
3,Office_Alannah,5.807501e+05,3.721372e+04
4,Office_Aliyah,1.118817e+07,3.779078e+05
5,Office_Allyson,1.563937e+06,7.143396e+04
6,Office_Alyson,7.938095e+06,2.553963e+05
7,Office_Amelia,3.775902e+05,2.858943e+04
8,Office_Amelie,2.244152e+06,9.403608e+04
9,Office_Anastasia,7.505338e+06,2.498290e+05


# =====================================
# COMBINED MEASURED AND SIMULATED DATA

In [7]:
df_summary['elec_kbtu_simulated_woOccVar'] = df_summary['buildingid'].map(df_simulated1.set_index('bldg')['elec_kbtu_simulated_woOccVar'])
df_summary['ng_kbtu_simulated_woOccVar'] = df_summary['buildingid'].map(df_simulated1.set_index('bldg')['ng_kbtu_simulated_woOccVar'])
df_summary['elec_kbtu_simulated_wOccVar'] = df_summary['buildingid'].map(df_simulated2.set_index('bldg')['elec_kbtu_simulated_wOccVar'])
df_summary['ng_kbtu_simulated_wOccVar'] = df_summary['buildingid'].map(df_simulated2.set_index('bldg')['ng_kbtu_simulated_wOccVar'])
df_summary

Unnamed: 0,buildingid,heatingtype,numberoffloors,primaryspaceusage,sqft,timezone,yearbuilt,elec_kbtu_measured,elec_kbtu_simulated_woOccVar,ng_kbtu_simulated_woOccVar,elec_kbtu_simulated_wOccVar,ng_kbtu_simulated_wOccVar
0,PrimClass_Everett,,,Primary/Secondary Classroom,105530.00000,America/New_York,,71451.7,5.007922e+06,1.147553e+06,4.608727e+06,7.446663e+05
1,UnivClass_Clifford,,,College Classroom,56969.00000,America/New_York,1967,42251.2,2.666000e+06,1.129812e+06,2.432724e+06,7.925181e+05
2,Office_Elizabeth,,,Office,294651.00000,America/Los_Angeles,,262366,1.194510e+07,1.413213e+06,1.176233e+07,9.502757e+05
3,Office_Ellie,,,Office,496517.00000,America/Los_Angeles,,460770,1.820609e+07,6.345435e+06,1.890271e+07,4.271398e+06
4,PrimClass_Elisabeth,,,Primary/Secondary Classroom,233062.00000,America/New_York,,225613,9.828621e+06,2.798449e+06,9.759971e+06,1.942151e+06
5,Office_Cristina,,,Office,92271.00000,America/New_York,2001,99076.6,4.194106e+06,1.440274e+06,3.869703e+06,8.959043e+05
6,PrimClass_Jolie,Gas,2.0,Primary/Secondary Classroom,31505.93530,Europe/London,pre 1919,35196.3,1.441702e+06,3.620026e+05,1.254286e+06,2.643773e+05
7,PrimClass_Jaylin,Gas,2.0,Primary/Secondary Classroom,47070.53470,Europe/London,1919-1945,58960.5,2.180356e+06,3.897456e+05,1.942940e+06,3.217674e+05
8,Office_Jesus,Gas,1.0,Office,42646.57180,Europe/London,Post 1976,53859.7,1.848933e+06,2.086721e+05,1.712623e+06,1.969562e+05
9,PrimClass_Esmeralda,,,Primary/Secondary Classroom,276443.00000,America/New_York,,386029,1.122722e+07,3.643869e+06,1.112047e+07,2.538360e+06


# =====================================
# COMPARE RESULTS

In [10]:
entries = df_summary.primaryspaceusage.unique()
fig = go.Figure()

for entry in entries:
    df_filtered = df_summary[df_summary['primaryspaceusage'] == entry]
    
    fig.add_trace(go.Scatter(
        x=df_filtered.elec_kbtu_measured, 
        y=df_filtered.elec_kbtu_simulated_woOccVar,
        mode='markers',
        name=entry,
    ))
    

fig.update_layout(
    title_text = "without occupant variability",
    width = 800,
    height = 600,
    xaxis = dict(
        range = [0, 16000000],
        title_text = 'Annual measured electricity consumption [kBtu]',
        nticks=10,
    ),
    yaxis = dict(
        range = [0, 16000000],
        title_text = 'Annual simulated electricity consumption [kBtu]',
        nticks=10,
    ),
)

fig.show()

In [11]:
entries = df_summary.primaryspaceusage.unique()
fig = go.Figure()

for entry in entries:
    df_filtered = df_summary[df_summary['primaryspaceusage'] == entry]
    
    fig.add_trace(go.Scatter(
        x=df_filtered.elec_kbtu_measured, 
        y=df_filtered.elec_kbtu_simulated_wOccVar,
        mode='markers',
        name=entry,
    ))
    

fig.update_layout(
    title_text = "with occupant variability",
    width = 800,
    height = 600,
    xaxis = dict(
        range = [0, 16000000],
        title_text = 'Annual measured electricity consumption [kBtu]',
        nticks=10,
    ),
    yaxis = dict(
        range = [0, 16000000],
        title_text = 'Annual simulated electricity consumption [kBtu]',
        nticks=10,
    ),
)

fig.show()

In [9]:
entries = df_summary.timezone.unique()
fig = go.Figure()

for entry in entries:
    df_filtered = df_summary[df_summary['timezone'] == entry]
    
    fig.add_trace(go.Scatter(
        x=df_filtered.elec_kbtu_measured, 
        y=df_filtered.elec_kbtu_simulated,
        mode='markers',
        name=entry,
    ))
    

fig.update_layout(
    width = 800,
    height = 600,
    xaxis = dict(
        range = [0, 16000000],
        title_text = 'Annual measured electricity consumption [kBtu]',
        nticks=10,
    ),
    yaxis = dict(
        range = [0, 16000000],
        title_text = 'Annual simulated electricity consumption [kBtu]',
        nticks=10,
    ),
)

fig.show()

In [20]:
df_summary.timezone.unique()

array(['America/New_York', 'America/Los_Angeles', 'Europe/London',
       'America/Denver', 'America/Chicago', 'Asia/Singapore',
       'Europe/Zurich', 'America/Phoenix'], dtype=object)