Compare specific energy use across buildings, looking at different features.

In [134]:
import pandas as pd
import plotly.express as px
import os

In [135]:
metadata = pd.read_excel("../data/raw/Potential objects.xlsx")
metadata.columns

Index(['Building', 'Address', 'Use Type', 'Addr meter', 'Energy', 'Unit',
       'Heat', 'measurement interval', 'Note', 'Data for DKSR',
       'Energy source', 'Heating output (kW)', 'Heating output - numeric',
       'Energy consumption (m3, GJ)', 'School hours, holiday calendar',
       'Variable or fixed gas pricing (Kč without DPH)',
       'Energy reference surface (m2)', 'Volume (m3)', 'EPC', 'EPC - numeric',
       'Number of floors', 'Floors above ground', 'Floors below ground',
       'Number of classrooms', 'Number of gyms', 'Number of kitchen',
       'Building height (m)', 'Year of construction', 'Materials',
       'Heat insulation', 'Windows', 'Windows area (m2)'],
      dtype='object')

In [136]:
metadata[['Heating output - numeric']]

Unnamed: 0,Heating output - numeric
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,


In [137]:
all_buildings_yearly_kwh = pd.read_parquet("../data/processed/all_buildings_yearly_kwh.parquet")

merge energy demand into metadata

In [138]:
demand_yearly_kwh = all_buildings_yearly_kwh.T
demand_yearly_kwh.rename(columns={c:f"{c}_kWh" for c in demand_yearly_kwh.columns}, inplace=True)
demand_yearly_kwh.fillna(0, inplace=True)
demand_yearly_kwh
## Delete 10.10PF1 - invalid data for 2022

Unnamed: 0,2021_kWh,2022_kWh,2023_kWh
10.10-PF1,0.0,57384.004,147694.8
10.25-PF1,797269.782,716797.432,619822.1
10.26-TF1,0.0,0.0,530504.2
10.28-PF1,756792.992,639437.036,657379.4
10.33-PF1,683271.742,591185.708,536475.2
10.4-TF1,0.0,0.0,444586.9
10.6-PF2,283316.142,547596.964,316734.9
10.9.2-PF1,0.0,0.0,145568.9
2.2-PF1,0.0,376257.696,369424.2
2.3.-PF1,886222.474,997653.954,826100.8


In [139]:
def get_demand(row, col):
    try:
        return demand_yearly_kwh.loc[row["Addr meter"].replace("/", "-"), col] 
    except: 
        return None

for c in demand_yearly_kwh.columns:
    metadata[c] = metadata.apply(lambda row: get_demand(row, c), axis=1)


In [140]:
metadata["Energy reference surface (m2)"].notna().sum() 

11

In [143]:
metadata["23-22-21"] = metadata["2021_kWh"] - metadata["2022_kWh"] - metadata["2021_kWh"]
metadata.sort_values("23-22-21", inplace=True)
metadata["Year of construction"] = metadata["Year of construction"].astype(str).str.replace(".0", "").replace("nan", "-")
metadata["plot_name"] = metadata["Building"] + "<br>" + metadata["Use Type"] + "<br>" + metadata["Year of construction"]
metadata = metadata.dropna(subset=demand_yearly_kwh.columns, how="all").reset_index()
metadata.loc[:, list(demand_yearly_kwh.columns) + ["plot_name"]]


Unnamed: 0,2021_kWh,2022_kWh,2023_kWh,plot_name
0,1725412.736,1345743.654,974415.892,DS Chodov<br>Social (Elderly home)<br>1990
1,0.0,780202.158,671037.806,DS Elišky Purkyňové<br>Social<br>2012
2,797269.782,716797.432,619822.1012,ZŠ Karla Čapka<br>School<br>1915
3,756792.992,639437.036,657379.4292,ZŠ Gutova<br>School<br>1970
4,683271.742,591185.708,536475.204,ZŠ U Vršovického nádraží<br>School<br>1930
5,283316.142,547596.964,316734.892,Areál Gutova<br>Sport areal<br>-
6,0.0,376257.696,369424.23,Gymnázium Na Vítězné pláni<br>School<br>1961
7,0.0,57384.004,147694.834,MŠ Kodaňská<br>School<br>-
8,0.0,0.0,530504.244,ZŠ Brigádníků<br>School<br>1960
9,0.0,0.0,145568.8668,MŠ Chmelová<br>School<br>-


In [145]:
fig = px.bar(metadata.loc[:, list(demand_yearly_kwh.columns) + ["plot_name"]].set_index("plot_name"), 
       barmode="group",
       title="Annual Energy Demand (kWh)",
       template="plotly_white",
       #orientation="h",
       width=1000,
       height=600)

fig.update_layout(yaxis_title="kWh",
                  xaxis_title="",
                  legend_title="Sensor")
fig.write_image("../reports/annual_energy_demand.png")
fig.show()

Specific Demand

In [8]:
for c in demand_yearly_kwh.columns:
    metadata[f"{c}/m²"] = metadata[c] / metadata["Energy reference surface (m2)"]

In [31]:
uses = pd.DataFrame(metadata["Use Type"].value_counts())
fig = px.bar(uses, 
             #names=uses.index, 
             #values=uses["count"], 
             width=600, height=400,
             title="Building Use Types",
             template="plotly_white",
             text_auto=True,
             orientation="h",
             barmode="stack")
fig.update_layout(showlegend=False,
                  xaxis_title="Count",
                  yaxis_title="",)

fig.write_image("../reports/building_types.png")
fig.show()

In [10]:
metadata.sample(3).T

Unnamed: 0,15,16,9
Building,Gymnázium Na Vítězné pláni,Gymnázium a Hudební škola hlavního města Prahy,Areál Gutova
Address,"Na Vítězné pláni 1160/1, Praha 4","Komenského nám. 400/9, Praha 3",Gutova 1987/39
Use Type,School,School,Sport areal
Addr meter,2.2/PF1,2.3/PF1,10.6/PF1
Energy,gas,gas,gas
Unit,"m3, Nm3","m3, Nm3","m3, Nm3"
Heat,Heat,Heat,Heat
measurement interval,1h,1h,1h
Note,,,Delete!!
Data for DKSR,YES,YES,NO


In [11]:
metadata["Use Type"]

0                    School
1                    School
2                    School
3                    School
4                    School
5                    School
6                    School
7                    School
8                    School
9               Sport areal
10              Sport areal
11    Social (Elderly home)
12                   Social
13                   Social
14                  Culture
15                   School
16                   School
Name: Use Type, dtype: object

In [12]:
specific_demand_cols = [c for c in metadata.columns if c.endswith("kWh/m²")]
temp_df = metadata.loc[:, specific_demand_cols + ["Building", "EPC - numeric", "Use Type"]].set_index("Building")

temp_df.rename(columns={"EPC - numeric": "EPC kWh/m²"}, inplace=True)
temp_df["valid"] = temp_df.drop("Use Type", axis=1).sum(axis=1) - temp_df["EPC kWh/m²"]

temp_df = temp_df.loc[temp_df["valid"]>0, :]
temp_df.drop("valid", axis=1, inplace=True)
temp_df.index = temp_df.index + "<br>" + temp_df["Use Type"]
temp_df

Unnamed: 0,2021_kWh/m²,2022_kWh/m²,2023_kWh/m²,EPC kWh/m²,Use Type
ZŠ Brigádníků<br>School,,,71.189512,243.9,School
ZŠ U Vršovického nádraží<br>School,118.029321,102.12225,92.671481,207.5,School
ZŠ Švehlova<br>School,,,50.463892,105.0,School
DS Chodov<br>Social (Elderly home),153.889827,120.027083,86.908303,224.7,Social (Elderly home)
DS Elišky Purkyňové<br>Social,,106.862369,91.910397,181.2,Social
Gymnázium Na Vítězné pláni<br>School,,60.404189,59.307149,96.1,School


In [14]:
fig = px.bar(temp_df.drop("Use Type", axis=1),
       barmode="group",
       template="plotly_white",
       title="Annual Primary Energy Demand (kWh/m²)<br>Measured vs. Label",
       width=800,
       height=400)
fig.update_layout(
       xaxis_title = "Building",
       yaxis_title = "kWh/m²"   
)
fig.show()
fig.write_image("../reports/annual_primary_energy_demand.png")