# Summarise and Present Building Load Data

In [None]:
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def get_building_summary_data(building_id, data_dir_path):

    # load available data
    building_dir = os.path.join(data_dir_path, f'UCam_Building_b{building_id}')
    elec_dir = os.path.join(building_dir, 'electricity')
    gas_dir = os.path.join(building_dir, 'gas')

    if os.path.exists(elec_dir):
        has_elec = True
        elec_csvs = [f for f in os.listdir(elec_dir) if (os.path.isfile(os.path.join(elec_dir, f))) and (f.endswith('.csv'))]
        elec_data = pd.concat([pd.read_csv(os.path.join(elec_dir, f)) for f in elec_csvs], ignore_index=True)
        elec_data['datetime'] = pd.to_datetime(elec_data['datetime'])

        mean_elec_load = elec_data['equipment load [kWh]'].mean()
        n_elec_years = len(elec_data['datetime'].dt.year.unique())
    else:
        has_elec = False
        mean_elec_load = 0
        n_elec_years = 0

    if os.path.exists(gas_dir):
        has_gas = True
        gas_csvs = [f for f in os.listdir(gas_dir) if (os.path.isfile(os.path.join(gas_dir, f))) and (f.endswith('.csv'))]
        gas_data = pd.concat([pd.read_csv(os.path.join(gas_dir, f)) for f in gas_csvs], ignore_index=True)
        gas_data['datetime'] = pd.to_datetime(gas_data['datetime'])

        mean_gas_load = gas_data['heating load [kWh]'].mean()
        n_gas_years = len(gas_data['datetime'].dt.year.unique())
    else:
        has_gas = False
        mean_gas_load = 0
        n_gas_years = 0

    gia_path = os.path.join(data_dir_path, 'building_floor_roof_areas.csv')
    gia_data = pd.read_csv(gia_path)
    building_gia = gia_data[gia_data['Building ID'] == f'b{building_id}']['GIA (m2)'].values[0]

    return {
        'building_id': building_id,
        'n_elec_years': n_elec_years,
        'mean_elec_load': mean_elec_load,
        'has_gas': has_gas,
        'n_gas_years': n_gas_years,
        'mean_gas_load': mean_gas_load,
        'building_gia': building_gia
    }

In [None]:
data_dir_path = os.path.join('processed_data')
building_dirs = [f for f in os.listdir(data_dir_path) if os.path.isdir(os.path.join(data_dir_path, f))]
regex = re.compile(r'\d+')
building_ids = sorted([int(*regex.findall(f)) for f in building_dirs])

In [None]:
summary_data = [get_building_summary_data(id,data_dir_path) for id in building_ids]

In [None]:
# plot summary data
import matplotlib.ticker


mean_elec_loads = [d['mean_elec_load'] for d in summary_data]
has_gas = [0 if d['has_gas'] else 1 for d in summary_data]
n_data_years = [d['n_elec_years'] + d['n_gas_years'] for d in summary_data]
building_gias = [d['building_gia'] for d in summary_data]

size_scale_exp = 1.5
fig, ax = plt.subplots()
sc = ax.scatter(building_gias, mean_elec_loads, c=has_gas, cmap='tab20c', s=np.array(n_data_years)**size_scale_exp, alpha=0.33)
plt.xlabel('GIA (m^2)')
plt.ylabel('Mean electricity load (kWh)')
size_ticks = [10,20,30]
size_leg = plt.legend(sc.legend_elements("sizes", num=np.array(size_ticks)**size_scale_exp)[0], size_ticks, loc='upper right', title='No. data years')
color_leg = plt.legend(sc.legend_elements("colors")[0],['Has gas','No gas'], loc='upper left', title='Gas data?')
ax.add_artist(size_leg)
ax.add_artist(color_leg)
plt.savefig('building_data_summary.png')

plt.ylim(0, 200)
plt.xlim(0, 8000)
plt.savefig('building_data_summary_zoomed.png')

ToDo: convert this to an interactive Plotly scatter plot with helpful hover info.