### Imports

In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import trainer_lib as tl
from torch.nn.utils import weight_norm
import torch_model_definitions as tmd

torch.manual_seed(2412231200)
random.seed(3009231410)
np.random.seed(2909231846)
np_random_state = np.random.RandomState(131002)

### Load data

In [2]:
df: pd.DataFrame = tl.load_country_wide_dataset('../data/country_data.csv')

X = df['el_load'].to_numpy(dtype=np.float32)
y = X.copy()

### Define models

In [3]:
class TCN(nn.Module):
    def __init__(self, seq_len=24, pred_len=3, num_channels=(24,) * 2, kernel_size=3, dropout=0.5, **kwargs):
        super(TCN, self).__init__()
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.tcn = tmd.TemporalConvNet(1, num_channels, kernel_size=kernel_size, dropout=dropout)
        self.fc = nn.Linear(num_channels[-1], pred_len)       

    def forward(self, x):
        x = x.reshape(-1, 1, self.seq_len)
        x = self.tcn(x)
        return self.fc(x[:, :, -1])

### Train models

I'll use t-48 lookback, since 1D CNNs worked a lot better with that.

In [4]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0001],
    'seq_len': [48],
    'pred_len': [3],
    'model': [TCN],
    'kernel_size': [3, 5, 7],
    'num_channels': [(24,) * 2, (48,) * 3, (72,) *  4, (50,) * 5],
    'dropout': [0.3],
    'n_splits': [6],
    'val_mod': [8],
})

wrapper = tl.MIMOTSWrapper(TCN(), seq_len=48, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

[Grid search 001] BEGIN - params: {'epochs': 1000, 'lr': 0.0001, 'seq_len': 48, 'pred_len': 3, 'model': <class '__main__.TCN'>, 'kernel_size': 3, 'num_channels': (24, 24), 'dropout': 0.3, 'n_splits': 6, 'val_mod': 8}
[Fold 1] BEGIN - END - RMSE loss: 174.925 - Time: 1.4 min.
[Fold 2] BEGIN - END - RMSE loss: 142.911 - Time: 2.4 min.
[Fold 3] BEGIN - END - RMSE loss: 165.613 - Time: 2.8 min.
[Fold 4] BEGIN - END - RMSE loss: 166.768 - Time: 3.0 min.
[Fold 5] BEGIN - END - RMSE loss: 205.808 - Time: 2.5 min.
[Fold 6] BEGIN - END - RMSE loss: 231.886 - Time: 1.1 min.
[Grid search 001] END - Score: 181.31848894 *
[Grid search 002] BEGIN - params: {'epochs': 1000, 'lr': 0.0001, 'seq_len': 48, 'pred_len': 3, 'model': <class '__main__.TCN'>, 'kernel_size': 5, 'num_channels': (24, 24), 'dropout': 0.3, 'n_splits': 6, 'val_mod': 8}
[Fold 1] BEGIN - END - RMSE loss: 123.993 - Time: 2.8 min.
[Fold 2] BEGIN - END - RMSE loss: 117.556 - Time: 2.1 min.
[Fold 3] BEGIN - END - RMSE loss: 126.191 - Time

Best one was (72,) * 4 channels, and kernel size of 5, kernel size 7 isn't too far off, so it might be worth investigating further.
(50,) * 5 channels with a kernel size of 5 performed comparably as well.