### Imports

In [None]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import trainer_lib as tl
from torch.nn.utils import weight_norm
from torch_model_definitions import GaussianNoise

torch.manual_seed(2412231200)
random.seed(3009231410)
np.random.seed(2909231846)
np_random_state = np.random.RandomState(131002)

### Load data

In [None]:
df: pd.DataFrame = tl.load_country_wide_dataset('../data/country_data.csv')

X = df['el_load'].to_numpy(dtype=np.float32)
y = X.copy()

### Define models

In [None]:
class ConvNetSmall(nn.Module):
    def __init__(self, dropout=0.5, **kwargs):
        super(ConvNetSmall, self).__init__()
        self.conv = nn.Sequential(
            nn.ZeroPad2d((4, 0, 0, 0)),
            nn.Conv1d(1, 8, 5),
            nn.BatchNorm1d(8),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.ZeroPad2d((4, 0, 0, 0)),
            nn.Conv1d(8, 16, 5),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(1, -1),
            nn.Dropout(dropout),
            nn.Linear(6 * 16, 3),
        )

    def forward(self, x):
        x = x.reshape(-1, 1, 24)
        x = self.conv(x)
        x = self.fc(x)
        return x
    
class ConvNetMedium(nn.Module):
    def __init__(self, dropout=0.5, **kwargs):
        super(ConvNetMedium, self).__init__()
        self.conv = nn.Sequential(
            nn.ZeroPad2d((6, 0, 0, 0)),
            nn.Conv1d(1, 16, 7),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(dropout / 5),
            nn.ZeroPad2d((6, 0, 0, 0)),
            nn.Conv1d(16, 32, 7),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(dropout / 5),
            nn.ZeroPad2d((6, 0, 0, 0)),
            nn.Conv1d(32, 32, 7),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(1, -1),
            nn.Dropout(dropout),
            nn.Linear(3 * 32, 3),
        )

    def forward(self, x):
        x = x.reshape(-1, 1, 24)
        x = self.conv(x)
        x = self.fc(x)
        return x
    
class ConvNetLarge(nn.Module):
    def __init__(self, dropout=0.5, **kwargs):
        super(ConvNetLarge, self).__init__()
        self.conv = nn.Sequential(
            nn.ZeroPad2d((8, 0, 0, 0)),
            nn.Conv1d(1, 32, 9),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(dropout / 5),
            nn.ZeroPad2d((8, 0, 0, 0)),
            nn.Conv1d(32, 64, 9),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(dropout / 5),
            nn.ZeroPad2d((8, 0, 0, 0)),
            nn.Conv1d(64, 64, 9),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(1, -1),
            nn.Dropout(dropout / 2),
            nn.Linear(3 * 64, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 3),
        )

    def forward(self, x):
        x = x.reshape(-1, 1, 24)
        x = self.conv(x)
        x = self.fc(x)
        return x
    
class ConvNetLessPad(nn.Module):
    def __init__(self, dropout=0.5, conv_channels=(32, 64, 32), kernel_sizes=(12, 6, 3), paddings=(8, 4, 0), pool_pad=(1, 0, 1), seq_len=24, **kwargs):
        super(ConvNetLessPad, self).__init__()
        self.conv = nn.Sequential(
            nn.ZeroPad2d((paddings[0], 0, 0, 0)),
            nn.Conv1d(1, conv_channels[0], kernel_sizes[0]),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=pool_pad[0]),
            nn.ZeroPad2d((paddings[1], 0, 0, 0)),
            nn.Conv1d(conv_channels[0], conv_channels[1], kernel_sizes[1]),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=pool_pad[1]),
            nn.ZeroPad2d((paddings[2], 0, 0, 0)),
            nn.Conv1d(conv_channels[1], conv_channels[2], kernel_sizes[2]),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=pool_pad[2]),
        )
        out = self.conv(torch.randn(1, 1, seq_len)).shape[-1]
        self.fc = nn.Sequential(
            nn.Flatten(1, -1),
            nn.Dropout(dropout),
            nn.Linear(conv_channels[2] * out, 3),
        )
        self.seq_len = seq_len
    
    def forward(self, x):
        x = x.reshape(-1, 1, self.seq_len)
        x = self.conv(x)
        x = self.fc(x)
        return x
    
class CNLP(ConvNetLessPad):
    def __init__(self, param_group=0, **kwargs):
        if param_group == 1:
            super(CNLP, self).__init__(conv_channels=(16, 24, 32), kernel_sizes=(8, 4, 3), paddings=(4, 2, 0), pool_pad=(1, 0, 1), seq_len=24, **kwargs)
        elif param_group == 2:
            super(CNLP, self).__init__(conv_channels=(32, 64, 128), kernel_sizes=(14, 8, 5), paddings=(10, 6, 1), pool_pad=(1, 0, 0), seq_len=24, **kwargs)
        elif param_group == 3:
            super(CNLP, self).__init__(conv_channels=(32, 48, 64), kernel_sizes=(12, 6, 3), paddings=(8, 4, 0), pool_pad=(1, 0, 1), seq_len=24, **kwargs)
        else:
            super(CNLP, self).__init__(**kwargs)
            
class ConvNetNoise(nn.Module):
    def __init__(self, seq_len=24, noise_sigma=0, dropout=0, **kwargs):
        super(ConvNetNoise, self).__init__()
        self.conv = nn.Sequential(
            nn.ZeroPad2d((10, 0, 0, 0)),
            nn.Conv1d(1, 32, 14),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=1),
            nn.ZeroPad2d((6, 0, 0, 0)),
            nn.Conv1d(32, 64, 8),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=0),
            nn.ZeroPad2d((2, 0, 0, 0)),
            nn.Conv1d(64, 128, 5),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=1),
        )
        out = self.conv(torch.randn(1, 1, seq_len)).shape[-1]
        self.fc = nn.Sequential(
            GaussianNoise(noise_sigma),
            nn.Flatten(1, -1),
            nn.Dropout(dropout),
            nn.Linear(128 * out, 3),
        )
        self.seq_len = seq_len
    
    def forward(self, x):
        x = x.reshape(-1, 1, self.seq_len)
        x = self.conv(x)
        x = self.fc(x)
        return x
    
class ConvNetLong(nn.Module):
    def __init__(self, seq_len=48, dropout=0, noise=0.05, **kwargs):
        super(ConvNetLong, self).__init__()
        self.conv = nn.Sequential(
            nn.ZeroPad2d((8, 0, 0, 0)),
            weight_norm(nn.Conv1d(1, 64, 16)),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=0),
            nn.ZeroPad2d((7, 0, 0, 0)),
            weight_norm(nn.Conv1d(64, 128, 14)),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=0),
        )
        out = self.conv(torch.randn(1, 1, seq_len)).shape[-1]
        self.fc = nn.Sequential(
            GaussianNoise(noise),
            nn.Flatten(1, -1),
            nn.Linear(128 * out, 2 * 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(2 * 64, 3),
        )
        self.seq_len = seq_len
    
    def forward(self, x):
        x = x.reshape(-1, 1, self.seq_len)
        x = self.conv(x)
        x = self.fc(x)
        return x

### Grid search

In [None]:
grid = tl.Grid({
    'epochs': [200],  # we use early stop anyway
    'batch_size': [64], # only 1 option, so we have less params to search, this can be a single test afterward
    'lr': [0.001],
    'model': [ConvNetSmall, ConvNetMedium, ConvNetLarge],
    'dropout': [0.5],
    'n_splits': [6],    
})

wrapper = tl.MIMOTSWrapper(ConvNetSmall(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"Best params: {b_p}\nBest score: {b_s}")

### Trying a model with less padding

In [None]:
wrapper = tl.MIMOTSWrapper(ConvNetLessPad(dropout=0.5), seq_len=24, pred_len=3)
res = wrapper.validate_ts_strategy(X, y, epochs=200, lr=0.0005, n_splits=6, batch_size=64, verbose=3)

This seems to work way better, let's use grid search to tune the model further.

### Grid search for the new models

In [None]:
grid = tl.Grid({
    'epochs': [200],  # we use early stop anyway
    'batch_size': [64],
    'lr': [0.0005],
    'model': [CNLP],
    'param_group': [0, 1, 2, 3], # 0 is the default model, 1, 2, 3 are the other models
    'n_splits': [6],  
})

wrapper = tl.MIMOTSWrapper(CNLP(), seq_len=24, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"Best params: {b_p}\nBest score: {b_s}")

I'll try applying Gaussian noise to the input, try a longer sequence and see if that helps training.

### Trying a longer sequence length and noise

In [None]:
wrapper = tl.MIMOTSWrapper(ConvNetLong(dropout=0.3, noise=0.02), seq_len=48, pred_len=3)
wrapper.validate_ts_strategy(X, y, epochs=200, lr=0.0005, n_splits=6, batch_size=64, verbose=3)

Speeding up training

In [None]:
grid = tl.Grid({
    'epochs': [200],  # we use early stop anyway
    'batch_size': [512, 1024, 2048],
    'lr': [0.001],
    'model': [ConvNetLong],
    'noise': [0.02],
    'dropout': [0.5],
    'n_splits': [6],
    'es_p': [20],
})

wrapper = tl.MIMOTSWrapper(ConvNetLong(), seq_len=48, pred_len=3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"Best params: {b_p}\nBest score: {b_s}")

In [None]:
wrapper = tl.MIMOTSWrapper(ConvNetLong(dropout=0.5, noise=0.02), seq_len=48, pred_len=3)
result = wrapper.validate_ts_strategy(X, y, epochs=400, lr=0.001, n_splits=6, batch_size=1024, es_p=20, verbose=3)

In [None]:
print(sum(result[3]) / len(result[3]))
st = X.shape[0] // 7
tl.MIMOTSWrapper.print_evaluation_info(*wrapper.predict(X[-st:], y[-st:]))