### Imports

In [None]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import trainer_lib as tl
import torch_model_definitions as tmd

torch.manual_seed(310231551)
random.seed(3009231410)
np.random.seed(2909231846)
np_random_state = np.random.RandomState(131002)

### Load data

In [None]:
df: pd.DataFrame = tl.load_country_wide_dataset('../data/country_data.csv')

X = df.to_numpy(dtype=np.float32)
y = df['el_load'].to_numpy(dtype=np.float32)

### Grid search

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0005],
    'model': [tmd.Seq2seq],
    'embedding_size': [24],
    'num_layers': [1, 2],
    'dropout': [0.5],
    'bidirectional': [True, False],
}) # n_splits defaulted to 6, val_mod to 8

wrapper = tl.S2STSWRAPPER(tmd.Seq2seq(), seq_len=24, pred_len=3, teacher_forcing_decay=0.01)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0005],
    'model': [tmd.Seq2seq],
    'embedding_size': [18, 12],
    'num_layers': [1, 2],
    'dropout': [0.5],
    'bidirectional': [True],
}) # n_splits defaulted to 6, val_mod to 8

wrapper = tl.S2STSWRAPPER(tmd.Seq2seq(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

A small embedding size and layer count seems to be the best option so far. Let's try smaller ones, and also a way larger embedding size.

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0005],
    'model': [tmd.Seq2seq],
    'embedding_size': [8, 10, 64],
    'num_layers': [1],
    'dropout': [0.5],
    'bidirectional': [True],
})  # n_splits defaulted to 6, val_mod to 8

wrapper = tl.S2STSWRAPPER(tmd.Seq2seq(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

Best parameters seem to be embedding size 12 and 1 bidirectional layer.

### Adding noise

I'll try a very small input noise, since that already threw off most models too much.

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0005],
    'model': [tmd.Seq2seq],
    'embedding_size': [10],
    'num_layers': [1],
    'dropout': [0.5],
    'bidirectional': [True],
    'in_noise': [0.0, 0.02],
    'out_noise': [0.0, 0.05]
})  # n_splits defaulted to 6, val_mod to 8

wrapper = tl.S2STSWRAPPER(tmd.Seq2seq(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

Applying noise to the input makes the model worse again, but noise right before predictions improve it's stability massively.
I noticed training taking too long, so I will increase the batch and learning rate. Increasing batch size alongside learning rate won't affect our results.

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [tmd.Seq2seq],
    'embedding_size': [10, 12],
    'num_layers': [1],
    'dropout': [0.5],
    'bidirectional': [True],
    'out_noise': [0.05],
    'batch_size': [1024],
    'es_p': [20],
})  # n_splits defaulted to 6, val_mod to 8

wrapper = tl.S2STSWRAPPER(tmd.Seq2seq(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [tmd.Seq2seq],
    'embedding_size': [10],
    'num_layers': [1],
    'dropout': [0.5],
    'bidirectional': [True],
    'out_noise': [0.05],
    'batch_size': [1024, 2048, 4096],
})  # n_splits defaulted to 6, val_mod to 8

wrapper = tl.S2STSWRAPPER(tmd.Seq2seq(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
wrapper = tl.MIMOTSWrapper(tmd.Seq2seq(pred_len=3,embedding_size=10, num_layers=1, bidirectional=True, dropout=0.5, out_noise=0.05), seq_len=24, pred_len=3)
result = wrapper.validate_ts_strategy(X, y, 1000, batch_size=2048, lr=0.001, n_splits=6, es_p=10)

In [None]:
print(sum(result[3]) / len(result[3]), "-", sum(result[3][1:]) / (len(result[3]) - 1))
st = X.shape[0] // 7
tl.MIMOTSWrapper.print_evaluation_info(*wrapper.predict(X[-st:], y[-st:]))