This notebook uses the GPyTorch package to apply Gaussian Process regression to the multi-step energy consumption forecasting problem.

## Setup

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gpytorch
import torch

In [2]:
random_seed = 1923

In [3]:
# Set Torch settings
torch.set_default_dtype(torch.float32)
torch.set_float32_matmul_precision('medium')

In [4]:
# Plot settings
plt.rcParams["figure.autolayout"] = True
plt.rcParams['figure.dpi'] = 100
sns.set_style("darkgrid")

In [5]:
output_dir = "./OutputData/"

In [6]:
df = pd.read_csv(output_dir + "full_data.csv")
df["time"] = pd.to_datetime(df["time"], format = "%d:%m:%Y:%H:%M")

In [7]:
# Drop generation columns
gen_cols = df.columns.values[2:].tolist()
df = df.drop(gen_cols, axis = 1)

In [8]:
df

Unnamed: 0,time,consumption_MWh
0,2018-01-01 00:00:00,27412.81
1,2018-01-01 01:00:00,26324.39
2,2018-01-01 02:00:00,24635.32
3,2018-01-01 03:00:00,23872.12
4,2018-01-01 04:00:00,23194.89
...,...,...
52579,2023-12-31 19:00:00,35090.93
52580,2023-12-31 20:00:00,33310.94
52581,2023-12-31 21:00:00,32083.96
52582,2023-12-31 22:00:00,30469.49


## Data prep

We do not need to cyclical encode seasonal features, as we will apply a periodic kernel to them.

In [9]:
# Add time columns

# Trend
df["trend"] = df.index.values

# Hour of day
df["hour"] = df.time.dt.hour + 1

# Day of week
df["dayofweek"] = df.time.dt.dayofweek + 1

# Month
df["month"] = df.time.dt.month

In [10]:
df

Unnamed: 0,time,consumption_MWh,trend,hour,dayofweek,month
0,2018-01-01 00:00:00,27412.81,0,1,1,1
1,2018-01-01 01:00:00,26324.39,1,2,1,1
2,2018-01-01 02:00:00,24635.32,2,3,1,1
3,2018-01-01 03:00:00,23872.12,3,4,1,1
4,2018-01-01 04:00:00,23194.89,4,5,1,1
...,...,...,...,...,...,...
52579,2023-12-31 19:00:00,35090.93,52579,20,7,12
52580,2023-12-31 20:00:00,33310.94,52580,21,7,12
52581,2023-12-31 21:00:00,32083.96,52581,22,7,12
52582,2023-12-31 22:00:00,30469.49,52582,23,7,12


In [11]:
# Split features & target, create tensors
X = torch.tensor(
        df.drop(["time", "consumption_MWh"], axis = 1).values, dtype = torch.float32)
y = torch.tensor(
        df["consumption_MWh"].values, dtype = torch.float32)

In [17]:
# Evaluation parameters that match the sequence2sequence testing scheme
horizon = 32 # Forecast horizon
first_t = df[df["time"] == '2022-10-18 16:00:00'].index[0] # First prediction point
stride = 24 # Number of timesteps between each prediction point

In [18]:
# Initial train - test split
X_train, X_test = X[:first_t, :], X[first_t:, :]
y_train, y_test = y[:first_t], y[first_t:]

## Model & wrapper definition

In [None]:
# ExactGP model class
class ExactGPModel(gpytorch.models.ExactGP):

    def __init__(self, X_train, y_train, likelihood):
        super().__init__(X_train, y_train, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covariance_module = # Linear(trend) + Periodic(hour) + Periodic(day) + Periodic(Month)

    def forward(self, x):
        mean = self.mean_module(x)
        covar = self.covariance_module(x)
        return gpytorch.distributions.MultivariateNormal(mean, covar)

In [None]:
# ExactGP wrapper class
class ExactGP:
    
    def __init__(self, model, likelihood, cuda = True):
        self.model = model,
        self.likelihood = likelihood,
        self.cuda = cuda

    # Training method
    def train(self, X_train, y_train, max_epochs, learning_rate = 1e-3, early_stop = 10, early_stop_tol = 1e-4)

        # Put tensors on GPU if cuda is enabled
        if cuda:
            X_train = X_train.cuda()
            y_train = y_train.cuda()
            self.model = self.model.cuda()
            self.likelihood = self.likelihood.cuda()

        # Find optimal kernel hyperparameters
        self.model.train()
        self.likelihood.train()

        # Create Adam optimizer with model parameters
        optimizer = torch.optim.Adam(self.model.parameters(), lr = learning_rate)

        # Create marginal log likelihood loss
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)

        # Training loop
        for epoch in range(max_epochs):

            # Early stop
            if self._epochs_no_improvement >= early_stop:
                print(f"Early stopping at epoch {epoch+1}")
                break

            # Reset gradients
            optimizer.zero_grad()

            # Get outputs from model
            output = self.model(X_train)

            # Calculate loss and perform backpropagation
            loss = -mll(output, y_train)
            loss.backward()

            # Print epoch info & update model parameters
            noise = self.model.likelihood.noise
            print(f"Epoch: {epoch+1}/{max_epochs}, Loss: {loss}, Noise: {noise}")
            optimizer.step()

            # Initialize best loss & rounds with no improvement if first epoch
            if epoch == 0:
                self._best_loss = loss
                self._epochs_no_improvement = 0

            # Record an epoch with no improvement
            if self._best_loss < loss - early_stop_tol:
                self._epochs_no_improvement += 1

            # Record an improvement in the loss
            if self._best_loss > loss:
                self._best_loss = loss
                self._epochs_no_improvement = 0
                
    # Method to update model training data (kernel hyperparameters unchanged, no additional training)
    def update_train(self, X_update, y_update):
        
        # Put tensors on GPU if cuda is enabled
        if cuda:
            X_update = X_update.cuda()
            y_update = y_update.cuda()

        # Update model training data
        self.model = self.model.get_fantasy_model(X_update, y_update)

    # Predict method
    def predict(self, X_test, cpu = True, fast_preds = False)

        # Test data to GPU, if cuda enabled
        if cuda:
            X_test = X_test.cuda()

        # Activate eval mode
        self.model.eval()
        self.likelihood.eval()

        # Make predictions without gradient calculation
        with torch.no_grad(), gpytorch.settings.fast_pred_var(state = fast_preds):

            # Returns the model posterior distribution over functions p(f*|x*, X, y)
            # Noise is not yet added to the functions
            f_posterior = self.model(X_test)

            # Returns the predictive posterior distribution p(y*|x*, X, y)
            # Noise is added to the functions
            y_posterior = self.likelihood(f_posterior)

            # Get posterior predictive mean & prediction intervals
            # By default, 2 standard deviations around the mean
            y_mean = y_posterior.mean()
            y_lower, y_upper = y_posterior.confidence_region()

        # Return data to CPU if desired
        if cpu:
            y_posterior = y_posterior.cpu()
            y_mean = y_mean.cpu()
            y_lower = y_lower.cpu()
            y_upper = y_upper.cpu()

        return y_posterior, y_mean, y_lower, y_upper

Use get_fantasy_model method to update trained model's training data with new input sequences. Hyperparameters are not updated, which kind of mirrors the usage of input sequnces in NN models.
\
Can be too slow with exact inference. In that case, look into sparse variational inference & possibly minibatch training.

## Model testing

In [None]:
# Create model & likelihood


In [None]:
# Create scaler

In [None]:
# Evaluation pseudocode:
Create preds list
Perform feature scaling
Train until [:first_t]
Predict on first_t + horizon
Save preds
For pred points in [first_t:] // stride:
    Perform feature scaling
    Expand training set & online train
    Predict on first_t + eval index * stride + horizon
    Save preds
Concat & return preds, actual targets[first_t:]

In [None]:
# Plot predicted vs. actual, entire test set

In [None]:
# Plot predicted vs. actual, select sequences

In [None]:
# Calculate performance metrics