# Generating demo data for testing

In [1]:
import pandas as pd
import numpy as np
from thermo.utils.io import load_file, load_npy
from thermo.stages.preprocessing import mock_ventilation
from pathlib import Path

In [2]:
specifications = load_file(Path("../buildings/demo_school"), "specifications.yaml")

In [3]:
rooms = {room["name"]: room["capacity"] for room in specifications["room_descriptions"]}

In [4]:
rooms

{'Room A': 30,
 'Room B': 20,
 'Room C': 10,
 'Room D': 30,
 'Room E': 15,
 'Room F': 30,
 'Room G': 15,
 'Room H': 30,
 'Room I': 10,
 'Room J': 30}

In [5]:
adjacency = load_npy(Path("../buildings/demo_school"), "adjacency.npy")

In [6]:
adjacency

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
       [1, 0, 0, 1, 0, 1, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 1],
       [0, 1, 1, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 1, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [1, 0, 1, 0, 0, 0, 0, 0, 0, 0]])

In [7]:
timestamps = pd.date_range("2022-03-03",periods=10*24, freq="H")

## Bookings

### Raw bookings


We simulate bookings by choosing random bookings at random times, and set the nights to zero.

In [8]:
bookings = np.random.choice([0,1], size = (len(timestamps), len(rooms)), p=[0.9,0.1])

In [9]:
raw_bookings = pd.DataFrame(data = bookings, index=timestamps, columns=[room.replace(" ", "-") + "_booked" for room in rooms.keys()])

In [10]:
raw_bookings.loc[lambda x: x.index.hour<8] = 0 # set nights to zero

In [11]:
raw_bookings

Unnamed: 0,Room-A_booked,Room-B_booked,Room-C_booked,Room-D_booked,Room-E_booked,Room-F_booked,Room-G_booked,Room-H_booked,Room-I_booked,Room-J_booked
2022-03-03 00:00:00,0,0,0,0,0,0,0,0,0,0
2022-03-03 01:00:00,0,0,0,0,0,0,0,0,0,0
2022-03-03 02:00:00,0,0,0,0,0,0,0,0,0,0
2022-03-03 03:00:00,0,0,0,0,0,0,0,0,0,0
2022-03-03 04:00:00,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
2022-03-12 19:00:00,0,0,0,0,0,0,0,0,0,0
2022-03-12 20:00:00,0,0,0,0,0,1,0,1,1,0
2022-03-12 21:00:00,1,1,0,0,0,0,0,0,1,0
2022-03-12 22:00:00,0,0,1,0,0,1,0,0,0,0


### Reshaping bookings to input data format

In [12]:
bookings = (
    raw_bookings
    .copy()
    .rename(columns=lambda x: x[:-7])
    .assign(DATO = lambda x: x.index.date)
    .assign(TIME = lambda x: x.index.time)
    .reset_index()
    .drop(columns="index")
    .melt(id_vars = [ "DATO", "TIME"], var_name="ROOM_ID", value_name="BOOKED")
    .assign(MUNICIPALITY="demo_municipality")
    .assign(SCHOOL="demo_school")
    .assign(MUNICIPALITY_DATE_TIME_ROOM=lambda x:  x.DATO.astype(str)+"-"+x.TIME.astype(str) +"-"+x.ROOM_ID + "-" + x.MUNICIPALITY)
)

In [13]:
bookings

Unnamed: 0,DATO,TIME,ROOM_ID,BOOKED,MUNICIPALITY,SCHOOL,MUNICIPALITY_DATE_TIME_ROOM
0,2022-03-03,00:00:00,Room-A,0,demo_municipality,demo_school,2022-03-03-00:00:00-Room-A-demo_municipality
1,2022-03-03,01:00:00,Room-A,0,demo_municipality,demo_school,2022-03-03-01:00:00-Room-A-demo_municipality
2,2022-03-03,02:00:00,Room-A,0,demo_municipality,demo_school,2022-03-03-02:00:00-Room-A-demo_municipality
3,2022-03-03,03:00:00,Room-A,0,demo_municipality,demo_school,2022-03-03-03:00:00-Room-A-demo_municipality
4,2022-03-03,04:00:00,Room-A,0,demo_municipality,demo_school,2022-03-03-04:00:00-Room-A-demo_municipality
...,...,...,...,...,...,...,...
2395,2022-03-12,19:00:00,Room-J,0,demo_municipality,demo_school,2022-03-12-19:00:00-Room-J-demo_municipality
2396,2022-03-12,20:00:00,Room-J,0,demo_municipality,demo_school,2022-03-12-20:00:00-Room-J-demo_municipality
2397,2022-03-12,21:00:00,Room-J,0,demo_municipality,demo_school,2022-03-12-21:00:00-Room-J-demo_municipality
2398,2022-03-12,22:00:00,Room-J,0,demo_municipality,demo_school,2022-03-12-22:00:00-Room-J-demo_municipality


In [14]:
bookings.to_csv("../assets/demo_bookings.csv")

## Electricity

We model the electricity as the sum of 5 contributions:
1. A constant electricity consumption of 0.535 kWh.
2. Additional 0.1123 kWh x room_capacity for each room that is booked or in day mode.
3. Additional 0.2123 kWh x room_capacity for each room that is booked (we do not distinguish if it is being used or not).
4. Additional half-Gaussian noise of scale 0.3 kWh.
5. Negative (reduction in consumption) if two rooms are being used and share a wall times 0.015, due to heat transfer. 

The number of walls (I am counting the  two faces of each wall as two different effects) is $\sum_{j,k} V_{ij}A_{jk}V_{ki}$, where $V$ is the ventilation one hot encoding matrix and $A$ is the adjacency matrix.

### Raw electricity
We generate the data from the raw bookings

In [15]:
ventilation = mock_ventilation(dataf=raw_bookings, params={"is_on": True})

DEBUG  	 mock_ventilation running 	 parameters: params={'is_on': True}. 	 in file: preprocessing.py, function: wrapper
INFO  	 mock_ventilation completed 	 shape (240, 10)	 time 0.012s 	 in file: preprocessing.py, function: wrapper


In [16]:
ventilation

Unnamed: 0,Room-A_booked,Room-B_booked,Room-C_booked,Room-D_booked,Room-E_booked,Room-F_booked,Room-G_booked,Room-H_booked,Room-I_booked,Room-J_booked
2022-03-03 00:00:00,1,1,1,1,1,1,1,0,1,1
2022-03-03 01:00:00,1,1,1,1,1,1,1,0,1,1
2022-03-03 02:00:00,1,1,1,1,1,1,1,0,1,1
2022-03-03 03:00:00,1,1,1,1,1,1,1,0,1,1
2022-03-03 04:00:00,1,1,1,1,1,1,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...
2022-03-12 19:00:00,1,1,1,0,0,1,0,1,1,0
2022-03-12 20:00:00,1,1,1,0,0,1,0,1,1,0
2022-03-12 21:00:00,1,1,1,0,0,1,0,0,1,0
2022-03-12 22:00:00,0,1,1,0,0,1,0,0,0,0


In [17]:
ventilation_contributions = 0.1123 * (ventilation * np.array(rooms.values())).sum(axis=1)
booked_contributions = 0.2123 * (raw_bookings * np.array(rooms.values())).sum(axis=1)
noise_contributions = np.random.normal(scale=0.3, size=len(timestamps)).clip(min=0) 
heat_transfer_contributions = 0.015* np.einsum("ij,jk,ik->i", ventilation.to_numpy(), adjacency, ventilation.to_numpy())
raw_electricity = 0.535 + ventilation_contributions + booked_contributions + noise_contributions - heat_transfer_contributions

In [18]:
raw_electricity

2022-03-03 00:00:00    21.572000
2022-03-03 01:00:00    21.572000
2022-03-03 02:00:00    21.749901
2022-03-03 03:00:00    21.572000
2022-03-03 04:00:00    21.753931
                         ...    
2022-03-12 19:00:00    15.044000
2022-03-12 20:00:00    29.905000
2022-03-12 21:00:00    24.682777
2022-03-12 22:00:00    15.815235
2022-03-12 23:00:00     7.095228
Freq: H, Length: 240, dtype: float64

### Reshaping electricity data into input format

In [19]:
electricity = (
    raw_electricity
    .copy()
    .round(decimals=2)
    .to_frame(name="VALUE")
    .assign(DATO = lambda x: x.index.date)
    .assign(TIME = lambda x: x.index.time)
    .reset_index()
    .drop(columns="index")
    .assign(MUNICIPALITY="demo_municipality")
    .assign(SCHOOL="demo_school")
    .assign(MEASURE_POINT="12892834")
    .assign(MEASURE_TYPE="electricity")
    .assign(UNIT="kWh")
    .assign(MUNICIPALITY_DATE=lambda x: x.DATO.astype(str) + "-" + x.MUNICIPALITY)
)

In [20]:
electricity

Unnamed: 0,VALUE,DATO,TIME,MUNICIPALITY,SCHOOL,MEASURE_POINT,MEASURE_TYPE,UNIT,MUNICIPALITY_DATE
0,21.57,2022-03-03,00:00:00,demo_municipality,demo_school,12892834,electricity,kWh,2022-03-03-demo_municipality
1,21.57,2022-03-03,01:00:00,demo_municipality,demo_school,12892834,electricity,kWh,2022-03-03-demo_municipality
2,21.75,2022-03-03,02:00:00,demo_municipality,demo_school,12892834,electricity,kWh,2022-03-03-demo_municipality
3,21.57,2022-03-03,03:00:00,demo_municipality,demo_school,12892834,electricity,kWh,2022-03-03-demo_municipality
4,21.75,2022-03-03,04:00:00,demo_municipality,demo_school,12892834,electricity,kWh,2022-03-03-demo_municipality
...,...,...,...,...,...,...,...,...,...
235,15.04,2022-03-12,19:00:00,demo_municipality,demo_school,12892834,electricity,kWh,2022-03-12-demo_municipality
236,29.90,2022-03-12,20:00:00,demo_municipality,demo_school,12892834,electricity,kWh,2022-03-12-demo_municipality
237,24.68,2022-03-12,21:00:00,demo_municipality,demo_school,12892834,electricity,kWh,2022-03-12-demo_municipality
238,15.82,2022-03-12,22:00:00,demo_municipality,demo_school,12892834,electricity,kWh,2022-03-12-demo_municipality


In [21]:
electricity.to_csv("../assets/demo_energy.csv")