### Imports

In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import trainer_lib as tl
import torch_model_definitions as tmd

torch.manual_seed(310231551)
random.seed(3009231410)
np.random.seed(2909231846)
np_random_state = np.random.RandomState(131002)

### Load data

In [2]:
df: pd.DataFrame = tl.load_country_wide_dataset('../data/country_data.csv')

X = df.to_numpy(dtype=np.float32)
y = X.copy()

### Define models

In [3]:
class LSTMRec(nn.Module):
    def __init__(self, features=11, hidden_size=20, num_layers=2, dropout=0.0, hid_noise=0.0, bidirectional=True, **kwargs):
        super(LSTMRec, self).__init__()
        self.hidden_size = hidden_size
        self.h_n_dim = 2 if bidirectional else 1
        self.num_layers = num_layers
        rec_drop = dropout if num_layers > 1 else 0.0
        self.lstm = nn.LSTM(input_size=features, hidden_size=self.hidden_size, num_layers=num_layers, batch_first=True, bidirectional=bidirectional, dropout=rec_drop)
        # https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
        self.fc = nn.Sequential(
            nn.Flatten(),
            tmd.GaussianNoise(hid_noise),
            nn.Dropout(dropout),
            nn.Linear(self.hidden_size * self.h_n_dim * self.num_layers, features)
        )

    def forward(self, x):
        batch_size = x.shape[0]
        h_0 = torch.zeros(self.h_n_dim * self.num_layers, batch_size, self.hidden_size).to(tl.TRAINER_LIB_DEVICE)
        c_0 = torch.zeros(self.h_n_dim * self.num_layers, batch_size, self.hidden_size).to(tl.TRAINER_LIB_DEVICE)

        _, (h_n, c_n) = self.lstm(x, (h_0, c_0))
        h_n = torch.permute(h_n, (1, 0, 2))
        return self.fc(h_n)
    
class GRURec(nn.Module):
    def __init__(self, features=11, hidden_size=20, num_layers=2, dropout=0.0, hid_noise=0.0, bidirectional=True, **kwargs):
        super(GRURec, self).__init__()
        self.hidden_size = hidden_size
        self.h_n_dim = 2 if bidirectional else 1
        self.num_layers = num_layers
        rec_drop = dropout if num_layers > 1 else 0.0
        self.gru = nn.GRU(input_size=features, hidden_size=self.hidden_size, num_layers=num_layers, batch_first=True, bidirectional=bidirectional, dropout=rec_drop)
        # https://pytorch.org/docs/stable/generated/torch.nn.GRU.html
        self.fc = nn.Sequential(
            nn.Flatten(),
            tmd.GaussianNoise(hid_noise),
            nn.Dropout(dropout),
            nn.Linear(self.hidden_size * self.h_n_dim * self.num_layers, features)
        )

    def forward(self, x):
        batch_size = x.shape[0]
        hidden = torch.zeros(self.h_n_dim * self.num_layers, batch_size, self.hidden_size).to(tl.TRAINER_LIB_DEVICE)
        
        _, hidden = self.gru(x, hidden)
        x = torch.permute(hidden, (1, 0, 2))
        return self.fc(x)

### Grid search

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [LSTMRec, GRURec],
    'hidden_size': [20, 40],
    'num_layers': [2, 3],
    'bidirectional': [True],
    'batch_size': [2048],
    'dropout': [0.5],
    'hid_noise': [0.05],
}) # val_mod is default at 8, n_splits at 6

wrapper = tl.RECOneModelTSWrapper(LSTMRec(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

The highest layer number and hidden size were the best, I will test even higher ones.
LSTM and GRU trade blows in this comparison, further testing is required.

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [LSTMRec, GRURec],
    'hidden_size': [60],
    'num_layers': [3, 4],
    'bidirectional': [True],
    'batch_size': [2048],
    'dropout': [0.5],
    'hid_noise': [0.05],
}) # val_mod is default at 8, n_splits at 6

wrapper = tl.RECOneModelTSWrapper(LSTMRec(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0005],
    'model': [GRURec],
    'hidden_size': [50, 60, 70],
    'num_layers': [4],
    'bidirectional': [True],
    'batch_size': [2048],
    'dropout': [0.5],
    'hid_noise': [0.05],
}) # val_mod is default at 8, n_splits at 6

wrapper = tl.RECOneModelTSWrapper(GRURec(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

The GRU model with hidden size 70 and 4 layers seem to work the best.

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [GRURec],
    'hidden_size': [70, 80],
    'num_layers': [5],
    'bidirectional': [True],
    'batch_size': [2048],
    'dropout': [0.5],
    'hid_noise': [0.05],
}) # val_mod is default at 8, n_splits at 6

wrapper = tl.RECOneModelTSWrapper(GRURec(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

### Final

In [4]:
wrapper = tl.RECOneModelTSWrapper(GRURec(11, 70, 5, 0.5, 0.05, True), seq_len=24, pred_len=3)
result = wrapper.validate_ts_strategy(X, y, 1000, batch_size=2048, lr=0.001, n_splits=6)

[Fold 1] BEGIN
	Epoch 017: train loss: 0.235491, val loss: 0.372250, test loss: 0.728472  

KeyboardInterrupt: 

In [None]:
print(sum(result[3]) / len(result[3]), "-", sum(result[3][1:]) / (len(result[3]) - 1))
st = X.shape[0] // 7
tl.TSMWrapper.print_evaluation_info(*wrapper.predict(X[-st:], y[-st:]))