### Imports

In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import trainer_lib as tl

torch.manual_seed(310231551)
random.seed(3009231410)
np.random.seed(2909231846)
np_random_state = np.random.RandomState(131002)

### Load data

In [2]:
df: pd.DataFrame = tl.load_country_wide_dataset('../data/country_data.csv')

X = df.to_numpy(dtype=np.float32)
y = df['el_load'].to_numpy(dtype=np.float32)

print(X.shape)

(75960, 11)


### Define models

In [3]:
class LSTMModel(nn.Module):
    def __init__(self, features=11, hidden_size=15, num_layers=2, bidirectional=True):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.h_n_dim = 2 if bidirectional else 1
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size=features, hidden_size=self.hidden_size, num_layers=num_layers, batch_first=True, bidirectional=bidirectional)
        # https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.hidden_size * self.h_n_dim * self.num_layers, 3)
        )

    def forward(self, x):
        batch_size = x.shape[0]
        h_0 = torch.zeros(self.h_n_dim * self.num_layers, batch_size, self.hidden_size).requires_grad_().to(tl.TRAINER_LIB_DEVICE)
        c_0 = torch.zeros(self.h_n_dim * self.num_layers, batch_size, self.hidden_size).requires_grad_().to(tl.TRAINER_LIB_DEVICE)

        output, (h_n, c_n) = self.lstm(x, (h_0, c_0))
        h_n = torch.permute(h_n, (1, 0, 2)) # From shape [h_n_dim, batch, hidden_size] -> [batch, h_n_dim, hidden_size]
                                            # flatten and fully connected layer expects batch to be the first dimension
        return self.fc(h_n)

### Grid search

I'll first look at different model constructions, then I'll look into hyperparameters, dropouts, noise and maybe higher sequence lengths.

In [4]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'hidden_size': [15, 30],
    'num_layers': [1, 2],
    'bidirectional': [False, True],
    'n_splits': [6],
    'val_mod': [4],
})

wrapper = tl.MIMOTSWrapper(LSTMModel(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

[Grid search 001] BEGIN - params: {'epochs': 1000, 'lr': 0.001, 'hidden_size': 15, 'num_layers': 1, 'bidirectional': False, 'n_splits': 6, 'val_mod': 4}
[Fold 1] BEGIN
Early stopping... Epoch 065: train loss: 0.007148, val loss: 0.046734, test loss: 0.098145
[Fold 1] END - RMSE loss: 210.850 - Time: 0.6 min.
[Fold 2] BEGIN
Early stopping... Epoch 050: train loss: 0.007206, val loss: 0.017968, test loss: 0.025849
[Fold 2] END - RMSE loss: 115.334 - Time: 0.8 min.
[Fold 3] BEGIN
Early stopping... Epoch 101: train loss: 0.003927, val loss: 0.006934, test loss: 0.025280
[Fold 3] END - RMSE loss: 103.338 - Time: 2.3 min.
[Fold 4] BEGIN
Early stopping... Epoch 025: train loss: 0.008539, val loss: 0.055839, test loss: 0.049122
[Fold 4] END - RMSE loss: 179.626 - Time: 0.7 min.
[Fold 5] BEGIN
Early stopping... Epoch 071: train loss: 0.005660, val loss: 0.011917, test loss: 0.023227
[Fold 5] END - RMSE loss: 105.905 - Time: 2.6 min.
[Fold 6] BEGIN
Early stopping... Epoch 063: train loss: 0.0059

In [5]:
n_p = {k: [v] for k, v in b_p.items()}
n_p['val_mod'] = [2, 3, 4, 5, 6, 7]
grid = tl.Grid(n_p)

wrapper = tl.MIMOTSWrapper(LSTMModel(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

[Grid search 001] BEGIN - params: {'epochs': 1000, 'lr': 0.001, 'hidden_size': 15, 'num_layers': 1, 'bidirectional': True, 'n_splits': 6, 'val_mod': 2}
[Fold 1] BEGIN
Early stopping... Epoch 022: train loss: 0.018808, val loss: 0.157608, test loss: 0.196077
[Fold 1] END - RMSE loss: 258.578 - Time: 0.2 min.
[Fold 2] BEGIN
Early stopping... Epoch 107: train loss: 0.005722, val loss: 0.019984, test loss: 0.054572
[Fold 2] END - RMSE loss: 161.038 - Time: 1.5 min.
[Fold 3] BEGIN
Early stopping... Epoch 044: train loss: 0.006693, val loss: 0.014861, test loss: 0.029315
[Fold 3] END - RMSE loss: 116.487 - Time: 0.9 min.
[Fold 4] BEGIN
Early stopping... Epoch 049: train loss: 0.006178, val loss: 0.016257, test loss: 0.025864
[Fold 4] END - RMSE loss: 117.679 - Time: 1.4 min.
[Fold 5] BEGIN
Early stopping... Epoch 087: train loss: 0.005269, val loss: 0.026409, test loss: 0.026488
[Fold 5] END - RMSE loss: 101.697 - Time: 3.0 min.
[Fold 6] BEGIN
Early stopping... Epoch 035: train loss: 0.00719

In [6]:
n_p = {k: [v] for k, v in b_p.items()}
n_p['val_mod'] = [10, 9, 8, 7]
grid = tl.Grid(n_p)

wrapper = tl.MIMOTSWrapper(LSTMModel(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

[Grid search 001] BEGIN - params: {'epochs': 1000, 'lr': 0.001, 'hidden_size': 15, 'num_layers': 1, 'bidirectional': True, 'n_splits': 6, 'val_mod': 10}
[Fold 1] BEGIN
Early stopping... Epoch 136: train loss: 0.004069, val loss: 0.010996, test loss: 0.137112
[Fold 1] END - RMSE loss: 260.330 - Time: 1.3 min.
[Fold 2] BEGIN
Early stopping... Epoch 132: train loss: 0.004086, val loss: 0.005525, test loss: 0.015442
[Fold 2] END - RMSE loss: 84.449 - Time: 2.1 min.
[Fold 3] BEGIN
Early stopping... Epoch 091: train loss: 0.004292, val loss: 0.004129, test loss: 0.020504
[Fold 3] END - RMSE loss: 99.479 - Time: 2.1 min.
[Fold 4] BEGIN
Early stopping... Epoch 104: train loss: 0.004584, val loss: 0.024279, test loss: 0.017973
[Fold 4] END - RMSE loss: 100.968 - Time: 3.1 min.
[Fold 5] BEGIN
Early stopping... Epoch 117: train loss: 0.004514, val loss: 0.009021, test loss: 0.032534
[Fold 5] END - RMSE loss: 133.433 - Time: 4.3 min.
[Fold 6] BEGIN
Early stopping... Epoch 071: train loss: 0.005727