### Imports

In [None]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import trainer_lib as tl
from torch.nn.utils import weight_norm
import torch_model_definitions as tmd

torch.manual_seed(2412231200)
random.seed(3009231410)
np.random.seed(2909231846)
np_random_state = np.random.RandomState(131002)

### Load data

In [None]:
df: pd.DataFrame = tl.load_country_wide_dataset('../data/country_data.csv')

X = df['el_load'].to_numpy(dtype=np.float32)
y = X.copy()

### Define models

In [None]:
class TCN(nn.Module):
    def __init__(self, seq_len=24, pred_len=3, num_channels=(24,) * 2, kernel_size=3, dropout=0.5, hid_noise=0.0, **kwargs):
        super(TCN, self).__init__()
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.tcn = tmd.TemporalConvNet(1, num_channels, kernel_size=kernel_size, dropout=dropout)
        self.hid_noise = tmd.GaussianNoise(hid_noise)
        self.fc = nn.Linear(num_channels[-1], pred_len)       

    def forward(self, x):
        x = x.reshape(-1, 1, self.seq_len)
        x = self.tcn(x)
        x = self.hid_noise(x)
        return self.fc(x[:, :, -1])

### Train models

I'll use t-48 lookback, since 1D CNNs worked a lot better with that.

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0001],
    'seq_len': [48],
    'pred_len': [3],
    'model': [TCN],
    'kernel_size': [3, 5, 7],
    'num_channels': [(24,) * 2, (48,) * 3, (72,) *  4, (50,) * 5],
    'dropout': [0.3],
    'n_splits': [6],
    'val_mod': [8],
})

wrapper = tl.MIMOTSWrapper(TCN(), seq_len=48, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

Best one was (72,) * 4 channels, and kernel size of 5, kernel size 7 isn't too far off, so it might be worth investigating further.
(50,) * 5 channels with a kernel size of 5 performed comparably as well.

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0001],
    'seq_len': [48],
    'pred_len': [3],
    'model': [TCN],
    'kernel_size': [5, 7, 9],
    'num_channels': [(24, 48, 72, 96,), (50, 60, 70, 80 ,90,), (20,) * 4, (100,) * 4],
    'dropout': [0.3],
    'n_splits': [6],
    'val_mod': [8],
})

wrapper = tl.MIMOTSWrapper(TCN(), seq_len=48, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

It seems that higher kernel and filter sizes might help.

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0001],
    'seq_len': [48],
    'pred_len': [3],
    'model': [TCN],
    'kernel_size': [9],
    'num_channels': [(100,) * 4],
    'dropout': [0.3, 0.5],
    'n_splits': [6],
    'val_mod': [8],
})

wrapper = tl.MIMOTSWrapper(TCN(), seq_len=48, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

### Noise

it seems that kernel size 5 was the most consistent, 4 or 5 layers worked almost equally well
let's try adding noise to the fully connected layer, previous tests show that input noise didn't work well

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0001],
    'seq_len': [48],
    'pred_len': [3],
    'model': [TCN],
    'kernel_size': [5],
    'hid_noise': [0.05, 0.1],
    'num_channels': [(72,) * 4, (72,) * 5],
    'dropout': [0.3],
    'n_splits': [6],
    'val_mod': [8],
})

wrapper = tl.MIMOTSWrapper(TCN(), seq_len=48, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

### Final

In [None]:
wrapper = tl.MIMOTSWrapper(TCN(seq_len=48, pred_len=3,num_channels=(72,) * 4, kernel_size=5, dropout=0.3, hid_noise=0.05), seq_len=48, pred_len=3)
result = wrapper.validate_ts_strategy(X, y, epochs=1000, lr=0.0001, n_splits=6, verbose=4)

In [None]:
print(sum(result[3]) / len(result[3]))
st = X.shape[0] // 7
tl.MIMOTSWrapper.print_evaluation_info(*wrapper.predict(X[-st:], y[-st:]))