This notebook uses the GPyTorch package to apply Gaussian Process regression to the multi-step energy consumption forecasting problem.

## Setup

In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gpytorch
import torch

In [37]:
random_seed = 1923

In [38]:
# Set Torch settings
torch.set_default_dtype(torch.float32)
torch.set_float32_matmul_precision('medium')

In [39]:
# Plot settings
plt.rcParams["figure.autolayout"] = True
plt.rcParams['figure.dpi'] = 100
sns.set_style("darkgrid")

In [40]:
output_dir = "./OutputData/"

In [41]:
df = pd.read_csv(output_dir + "full_data.csv")
df["time"] = pd.to_datetime(df["time"], format = "%d:%m:%Y:%H:%M")

In [42]:
# Drop generation columns
gen_cols = df.columns.values[2:].tolist()
df = df.drop(gen_cols, axis = 1)

In [43]:
df

Unnamed: 0,time,consumption_MWh
0,2018-01-01 00:00:00,27412.81
1,2018-01-01 01:00:00,26324.39
2,2018-01-01 02:00:00,24635.32
3,2018-01-01 03:00:00,23872.12
4,2018-01-01 04:00:00,23194.89
...,...,...
52579,2023-12-31 19:00:00,35090.93
52580,2023-12-31 20:00:00,33310.94
52581,2023-12-31 21:00:00,32083.96
52582,2023-12-31 22:00:00,30469.49


## Data prep

In [44]:
# Add time columns

# Trend
df["trend"] = df.index.values

# Hour of day
df["hour"] = df.time.dt.hour + 1

# Day of week
df["dayofweek"] = df.time.dt.dayofweek + 1

# Month
df["month"] = df.time.dt.month

In [45]:
df

Unnamed: 0,time,consumption_MWh,trend,hour,dayofweek,month
0,2018-01-01 00:00:00,27412.81,0,1,1,1
1,2018-01-01 01:00:00,26324.39,1,2,1,1
2,2018-01-01 02:00:00,24635.32,2,3,1,1
3,2018-01-01 03:00:00,23872.12,3,4,1,1
4,2018-01-01 04:00:00,23194.89,4,5,1,1
...,...,...,...,...,...,...
52579,2023-12-31 19:00:00,35090.93,52579,20,7,12
52580,2023-12-31 20:00:00,33310.94,52580,21,7,12
52581,2023-12-31 21:00:00,32083.96,52581,22,7,12
52582,2023-12-31 22:00:00,30469.49,52582,23,7,12


In [46]:
# Split features & target, create tensors
X = torch.tensor(
        df.drop(["time", "consumption_MWh"], axis = 1).values, dtype = torch.float32)
y = torch.tensor(
        df["consumption_MWh"].values, dtype = torch.float32)

In [50]:
# Match the sequence2sequence testing scheme
horizon = 32 # Forecast horizon
first_t = df[df["time"] == '2022-10-18 16:00:00'].index # First prediction point
stride = 24 # Number of timesteps between each prediction point

In [None]:
# Evaluation pseudocode:
Create preds list
Perform feature scaling
Train until [:first_t]
Predict on first_t + horizon
Save preds
For pred points in [first_t:] // stride:
    Perform feature scaling
    Expand training set & online train
    Predict on first_t + eval index * stride + horizon
    Save preds
Concat & return preds, actual targets[first_t:]

In [None]:
GPyTorch model training wrapper similar to Lightning