# Summarise and Present Building Load Data

In [None]:
import os
import re
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from plotly.offline import plot
import plotly.graph_objects as go

In [None]:
def get_building_summary_data(building_id, data_dir_path):

    # load available data
    building_dir = os.path.join(data_dir_path, f'UCam_Building_b{building_id}')
    elec_dir = os.path.join(building_dir, 'electricity')
    gas_dir = os.path.join(building_dir, 'gas')

    if os.path.exists(elec_dir):
        has_elec = True
        elec_csvs = [f for f in os.listdir(elec_dir) if (os.path.isfile(os.path.join(elec_dir, f))) and (f.endswith('.csv'))]
        elec_data = pd.concat([pd.read_csv(os.path.join(elec_dir, f)) for f in elec_csvs], ignore_index=True)
        elec_data['datetime'] = pd.to_datetime(elec_data['datetime'])

        mean_elec_load = elec_data['equipment load [kWh]'].mean()
        n_elec_years = len(elec_data['datetime'].dt.year.unique())
    else:
        has_elec = False
        mean_elec_load = 0
        n_elec_years = 0

    if os.path.exists(gas_dir):
        has_gas = True
        gas_csvs = [f for f in os.listdir(gas_dir) if (os.path.isfile(os.path.join(gas_dir, f))) and (f.endswith('.csv'))]
        gas_data = pd.concat([pd.read_csv(os.path.join(gas_dir, f)) for f in gas_csvs], ignore_index=True)
        gas_data['datetime'] = pd.to_datetime(gas_data['datetime'])

        mean_gas_load = gas_data['heating load [kWh]'].mean()
        n_gas_years = len(gas_data['datetime'].dt.year.unique())
    else:
        has_gas = False
        mean_gas_load = 0
        n_gas_years = 0

    gia_path = os.path.join(data_dir_path, 'building_floor_roof_areas.csv')
    gia_data = pd.read_csv(gia_path)
    building_gia = gia_data[gia_data['Building ID'] == f'b{building_id}']['GIA (m2)'].values[0]
    building_no_floors = gia_data[gia_data['Building ID'] == f'b{building_id}']['Number of floors'].values[0]
    building_roof_area = gia_data[gia_data['Building ID'] == f'b{building_id}']['Approx RA (m2)'].values[0]

    return {
        'building_id': building_id,
        'n_elec_years': n_elec_years,
        'mean_elec_load': mean_elec_load,
        'has_gas': has_gas,
        'n_gas_years': n_gas_years,
        'mean_gas_load': mean_gas_load,
        'building_gia': building_gia,
        'building_no_floors': building_no_floors,
        'building_roof_area': building_roof_area
    }

In [None]:
data_dir_path = os.path.join('processed_data')
building_dirs = [f for f in os.listdir(data_dir_path) if os.path.isdir(os.path.join(data_dir_path, f))]
regex = re.compile(r'\d+')
building_ids = sorted([int(*regex.findall(f)) for f in building_dirs])

In [None]:
summary_data = [get_building_summary_data(id,data_dir_path) for id in building_ids]

In [None]:
print(summary_data[0])

In [None]:
print(sum([d['n_elec_years'] for d in summary_data]))
print(sum([d['n_gas_years'] for d in summary_data]))

In [None]:
# plot summary data
mean_elec_loads = [d['mean_elec_load'] for d in summary_data]
has_gas = [0 if d['has_gas'] else 1 for d in summary_data]
n_data_years = [d['n_elec_years'] + d['n_gas_years'] for d in summary_data]
building_gias = [d['building_gia'] for d in summary_data]

size_scale_exp = 1.5
fig, ax = plt.subplots()
sc = ax.scatter(building_gias, mean_elec_loads, c=has_gas, cmap='tab20c', s=np.array(n_data_years)**size_scale_exp, alpha=0.33)
plt.xlabel('GIA (m^2)')
plt.ylabel('Mean electricity load (kW)')
size_ticks = [10,20,30]
size_leg = plt.legend(sc.legend_elements("sizes", num=np.array(size_ticks)**size_scale_exp)[0], size_ticks, loc='upper right', title='No. data years')
color_leg = plt.legend(sc.legend_elements("colors")[0],['Has gas','No gas'], loc='upper left', title='Gas data?')
ax.add_artist(size_leg)
ax.add_artist(color_leg)

plt.ylim(0, 200)
plt.xlim(0, 8000)
plt.show()

In [None]:
# create interactive plot of summary data

base_plot_url = 'https://eeci.github.io/Cambridge-Estates-Building-Energy-Archive/building_plots/UCam_Building_b{b_id}.html'

hover_text = []
for i,d in enumerate(summary_data):
    bsum_text = '<b>Building ID: {building_id}</b><br>' + \
        'No. electricity data years: {n_elec_years}<br>' + \
        'No. gas data years: {n_gas_years}<br>' + \
        'Mean electricity load: {mean_elec_load:.1f} kW<br>' + \
        'Mean gas load: {mean_gas_load:.1f} kW<br>' + \
        'GIA: {building_gia:0.0f} m^2<br>' + \
        'No. floors: {building_no_floors}<br>' + \
        'Approx. roof area: {building_roof_area:0.0f} m^2<br>' + \
        '<i>Click to view data</i>'
    hover_text.append(bsum_text.format(**d))

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[d['building_gia'] for d in summary_data if not d['has_gas']],
    y=[d['mean_elec_load'] for d in summary_data if not d['has_gas']],
    text=[hover_text[i] for i,d in enumerate(summary_data) if not d['has_gas']],
    marker_size=[d['n_elec_years'] + d['n_gas_years'] for d in summary_data if not d['has_gas']],
    name='No gas',
    marker_color='rgba(229, 0, 104, .75)',
    customdata=list([base_plot_url.format(b_id=d['building_id']) for d in summary_data if not d['has_gas']])

))
fig.add_trace(go.Scatter(
    x=[d['building_gia'] for d in summary_data if d['has_gas']],
    y=[d['mean_elec_load'] for d in summary_data if d['has_gas']],
    text=[hover_text[i] for i,d in enumerate(summary_data) if d['has_gas']],
    marker_size=[d['n_elec_years'] + d['n_gas_years'] for d in summary_data if d['has_gas']],
    name='Has gas',
    marker_color='rgba(0, 144, 112, .75)',
    customdata=list([base_plot_url.format(b_id=d['building_id']) for d in summary_data if d['has_gas']])
))

fig.update_traces(
    mode='markers',
    marker=dict(sizemode='area',sizeref=0.04,line_width=1),
    hoverinfo='text'
)

fig.update_layout(
    hoverlabel=dict(
        bgcolor="white",
        font_size=14
    )
)

fig.update_layout(
    #title='Summary data for Cambridge University Estates building energy usage archive',
    xaxis=dict(
        title='Building Gross Internal Area (m^2)',
        type='log'
    ),
    xaxis_range=[2.3,4.7],#[-500,50000],
    yaxis=dict(
        title='Building Mean Electricity Load (kW)',
        type='log'
    ),
    yaxis_range=[0.3,3.31],#[-75,1250],
    #paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(235, 241, 247)',
    legend_traceorder='reversed'
)

#fig.write_html(os.path.join('plots','building_data_summary_interactive.html'))
fig.write_image(os.path.join('plots','dataset_summary.pdf'), width=1080, height=650, scale=2)
fig.show()


# magic to open data plots via link on click
# taken from https://community.plotly.com/t/hyperlink-to-markers-on-map/17858/6

# Get HTML representation of plotly.js and this figure
plot_div = plot(fig, output_type='div', include_plotlyjs=True)

# Get id of html div element that looks like
# <div id="301d22ab-bfba-4621-8f5d-dc4fd855bb33" ... >
res = re.search('<div id="([^"]*)"', plot_div)
div_id = res.groups()[0]

# Build JavaScript callback for handling clicks
# and opening the URL in the trace's customdata 
js_callback = """
<script>
var plot_element = document.getElementById("{div_id}");
plot_element.on('plotly_click', function(data){{
    console.log(data);
    var point = data.points[0];
    if (point) {{
        console.log(point.customdata);
        window.open(point.customdata);
    }}
}})
</script>
""".format(div_id=div_id)

# Build HTML string
html_str = """
<html>
<body>
{plot_div}
{js_callback}
</body>
</html>
""".format(plot_div=plot_div, js_callback=js_callback)

# Write out HTML file
with open(os.path.join('plots','building_data_summary_interactive.html'), 'w') as f:
    f.write(html_str)

## Analysis for Building Design VoI project

In [None]:
# Find mean roof area of buidings of approx 100kW elec load for BD-VOI
FAs = []
RAs = []
for d in summary_data:
    if (d['mean_elec_load'] > 50) and (d['mean_elec_load'] < 150):
        print(d)
        FAs.append(d['building_gia'])
        RAs.append(d['building_roof_area'])
print(np.mean(RAs))
print(np.min(FAs))
print(np.max(FAs))

In [None]:
# Find mean roof area of buidings of approx 100kW elec load for BD-VOI
mean_elec_loads = []
roof_areas = []
for d in summary_data:
    # look at buildings with GIA close to that of buildings with approx. 100kW elec load
    if (d['building_gia'] > 1725) and (d['building_gia'] < 6000):
        print(d)
        if (d['mean_elec_load'] > 50) and (d['mean_elec_load'] < 150): # filter for buildings with 100kW \pm 50% elec load
            mean_elec_loads.append(d['mean_elec_load'])
            roof_areas.append(d['building_roof_area'])
print(np.mean(mean_elec_loads))
print(np.std(mean_elec_loads))
print(np.mean(roof_areas))

In [None]:
fig, ax = plt.subplots()
sns.kdeplot(mean_elec_loads, ax=ax, c='k', cut=0, levels=200)
plt.xlabel("Annual mean electrical load")
plt.ylabel("Density")
ax.set_yticks([])
ax.set_yticklabels([])
plt.show()