### Imports

In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import trainer_lib as tl
import torch_model_definitions as tmd

torch.manual_seed(310231551)
random.seed(3009231410)
np.random.seed(2909231846)
np_random_state = np.random.RandomState(131002)

### Load data

In [2]:
df: pd.DataFrame = tl.load_country_wide_dataset('../data/country_data.csv')
df

Unnamed: 0_level_0,el_load,prec,grad,holiday,weekend,hour,weekday,dayofyear,month,year,el_load_lag24
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2015-01-01 00:00:00,4270.718,0.000000,0.0,1.0,0.0,0,3,1,1,2015,0.000
2015-01-01 01:00:00,4068.940,0.000000,0.0,1.0,0.0,1,3,1,1,2015,0.000
2015-01-01 02:00:00,3754.788,0.000000,0.0,1.0,0.0,2,3,1,1,2015,0.000
2015-01-01 03:00:00,3510.361,0.000000,0.0,1.0,0.0,3,3,1,1,2015,0.000
2015-01-01 04:00:00,3426.489,0.000000,0.0,1.0,0.0,4,3,1,1,2015,0.000
...,...,...,...,...,...,...,...,...,...,...,...
2023-08-31 19:00:00,5480.864,0.020588,0.0,0.0,0.0,19,3,243,8,2023,5403.837
2023-08-31 20:00:00,5139.191,0.037255,0.0,0.0,0.0,20,3,243,8,2023,5110.462
2023-08-31 21:00:00,4849.395,0.000980,0.0,0.0,0.0,21,3,243,8,2023,4840.584
2023-08-31 22:00:00,4652.292,0.000000,0.0,0.0,0.0,22,3,243,8,2023,4643.627


### Define models

In [3]:
class LSTMModel(nn.Module):
    def __init__(self, features=11, hidden_size=15, num_layers=2, dropout=0.0, hid_noise=0.0,
                 bidirectional=True, **kwargs):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.h_n_dim = 2 if bidirectional else 1
        self.num_layers = num_layers
        rec_drop = dropout if num_layers > 1 else 0.0
        self.lstm = nn.LSTM(input_size=features, hidden_size=self.hidden_size, num_layers=num_layers, batch_first=True,
                            bidirectional=bidirectional, dropout=rec_drop)
        # https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
        self.fc = nn.Sequential(
            nn.Flatten(),
            tmd.GaussianNoise(hid_noise),
            nn.Dropout(dropout),
            nn.Linear(self.hidden_size * self.h_n_dim * self.num_layers, 1)
        )

    def forward(self, x):
        batch_size = x.shape[0]
        h_0 = torch.zeros(self.h_n_dim * self.num_layers, batch_size, self.hidden_size).to(
            tl.TRAINER_LIB_DEVICE)
        c_0 = torch.zeros(self.h_n_dim * self.num_layers, batch_size, self.hidden_size).to(
            tl.TRAINER_LIB_DEVICE)

        output, (h_n, c_n) = self.lstm(x, (h_0, c_0))
        h_n = torch.permute(h_n, (1, 0, 2))
        return self.fc(h_n) 


class GRUModel(nn.Module):
    def __init__(self, features=11, hidden_size=15, num_layers=2, dropout=0.0, hid_noise=0.0,
                 bidirectional=True, **kwargs):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.h_n_dim = 2 if bidirectional else 1
        self.num_layers = num_layers
        rec_drop = dropout if num_layers > 1 else 0.0
        self.gru = nn.GRU(input_size=features, hidden_size=self.hidden_size, num_layers=num_layers, batch_first=True,
                          bidirectional=bidirectional, dropout=rec_drop)
        # https://pytorch.org/docs/stable/generated/torch.nn.GRU.html
        self.fc = nn.Sequential(
            nn.Flatten(),
            tmd.GaussianNoise(hid_noise),
            nn.Dropout(dropout),
            nn.Linear(self.hidden_size * self.h_n_dim * self.num_layers, 1)
        )

    def forward(self, x):
        batch_size = x.shape[0]
        hidden = torch.zeros(self.h_n_dim * self.num_layers, batch_size, self.hidden_size).to(
            tl.TRAINER_LIB_DEVICE)

        output, hidden = self.gru(x, hidden)
        hidden = torch.permute(hidden, (1, 0, 2))
        return self.fc(hidden)
    

class ConvNet(nn.Module):
    def __init__(self, channels=(32, 64), kernel_sizes=(12, 6), noise_sigma=0.0, dropout=0.0, **kwargs):
        super(ConvNet, self).__init__()
        self.seq_len = 24
        self.conv = nn.Sequential(
            nn.ZeroPad2d((kernel_sizes[0] // 2, 0, 0, 0)),
            nn.Conv1d(1, channels[0], kernel_sizes[0]),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=1),
            nn.ZeroPad2d((kernel_sizes[1] // 2, 0, 0, 0)),
            nn.Conv1d(channels[0], channels[1], kernel_sizes[1]),
            nn.ReLU(),
            nn.MaxPool1d(2, padding=0),
        )
        out = self.conv(torch.randn(1, 1, self.seq_len)).shape[-1]
        self.fc = nn.Sequential(
            tmd.GaussianNoise(noise_sigma),
            nn.Flatten(1, -1),
            nn.Dropout(dropout),
            nn.Linear(channels[1] * out, 1),
        )

    def forward(self, x):
        x = x.reshape(-1, 1, self.seq_len)
        x = self.conv(x)
        x = self.fc(x)
        return x
    
    
class TCN(nn.Module):
    def __init__(self, num_channels=(24,) * 2, kernel_size=3, dropout=0.5, hid_noise=0.0, **kwargs):
        super(TCN, self).__init__()
        self.seq_len = 24
        self.pred_len = 1
        self.num_channels = num_channels
        self.kernel_size = kernel_size
        self.dropout = dropout
        self.tcn = tmd.TemporalConvNet(1, num_channels, kernel_size=kernel_size, dropout=dropout)
        self.hid_noise = tmd.GaussianNoise(hid_noise)
        self.fc = nn.Linear(num_channels[-1], 1)

    def forward(self, x):
        x = x.reshape(-1, 1, self.seq_len)
        x = self.tcn(x)
        x = self.hid_noise(x)
        return self.fc(x[:, :, -1])
    
    

# Individual testing

### precipitation

In [None]:
X = df['prec'].to_numpy(dtype=np.float32).reshape(-1, 1)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0005],
    'model': [ConvNet],
    'kernel_sizes': [(12, 6), (10, 10), (6, 12)],
    'channels': [(8, 16), (16, 32)],
    'noise_sigma': [0.05],
    'dropout': [0.5],
    'batch_size': [2048],
})

wrapper: tl.RECOneModelTSWrapper = tl.RECOneModelTSWrapper(ConvNet(), 24, 3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=2)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
X = df['prec'].to_numpy(dtype=np.float32).reshape(-1, 1)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [TCN],
    'kernel_size': [3, 5],
    'num_channels': [(16, 32), (32, 32)],
    'noise_sigma': [0.05],
    'dropout': [0.5],
    'batch_size': [2048],
})

wrapper: tl.RECOneModelTSWrapper = tl.RECOneModelTSWrapper(TCN(), 24, 3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=2)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
X = df['prec'].to_numpy(dtype=np.float32).reshape(-1, 1)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [LSTMModel, GRUModel],
    'features': [1],
    'hidden_size': [15, 20],
    'num_layers': [2],
    'bidirectional': [True],
    'hid_noise': [0.05],
    'dropout': [0.5],
    'batch_size': [2048],
})

wrapper: tl.RECOneModelTSWrapper = tl.RECOneModelTSWrapper(GRUModel(), 24, 3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=2)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

Let's try external features, will only predict 1 hour ahead here, the MMRec will give us the rest.

In [10]:
X = df.to_numpy(dtype=np.float32)[:, 1:] # remove el_load
y = df['prec'].to_numpy(dtype=np.float32)

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [LSTMModel, GRUModel],
    'features': [10],
    'hidden_size': [15, 20],
    'num_layers': [2],
    'bidirectional': [True],
    'hid_noise': [0.05],
    'dropout': [0.5],
    'batch_size': [2048],
})

wrapper: tl.MIMOTSWrapper = tl.MIMOTSWrapper(GRUModel(), seq_len=24, pred_len=1)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

[Grid search 001] BEGIN - params: {'epochs': 1000, 'lr': 0.001, 'model': <class '__main__.LSTMModel'>, 'features': 10, 'hidden_size': 15, 'num_layers': 2, 'bidirectional': True, 'hid_noise': 0.05, 'dropout': 0.5, 'batch_size': 2048}
[Fold 1] BEGIN - END - RMSE loss: 0.091 - Time: 0.8 min.
[Fold 2] BEGIN - END - RMSE loss: 0.095 - Time: 0.4 min.
[Fold 3] BEGIN - END - RMSE loss: 0.074 - Time: 0.3 min.
[Fold 4] BEGIN - END - RMSE loss: 0.073 - Time: 1.2 min.
[Fold 5] BEGIN - END - RMSE loss: 0.061 - Time: 0.8 min.
[Fold 6] BEGIN - END - RMSE loss: 0.083 - Time: 0.5 min.
[Grid search 001] END - Score: 0.07941002 * Without 1st split: 0.07718142086759339
[Grid search 002] BEGIN - params: {'epochs': 1000, 'lr': 0.001, 'model': <class '__main__.GRUModel'>, 'features': 10, 'hidden_size': 15, 'num_layers': 2, 'bidirectional': True, 'hid_noise': 0.05, 'dropout': 0.5, 'batch_size': 2048}
[Fold 1] BEGIN - END - RMSE loss: 0.080 - Time: 1.0 min.
[Fold 2] BEGIN - END - RMSE loss: 0.098 - Time: 0.3 m

### global radiation

In [None]:
X = df['grad'].to_numpy(dtype=np.float32).reshape(-1, 1)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.0005],
    'model': [ConvNet],
    'kernel_sizes': [(12, 6), (10, 10), (6, 12)],
    'channels': [(8, 16), (16, 32)],
    'noise_sigma': [0.05],
    'dropout': [0.5],
    'batch_size': [2048],
})

wrapper: tl.RECOneModelTSWrapper = tl.RECOneModelTSWrapper(ConvNet(), 24, 3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=2)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
X = df['grad'].to_numpy(dtype=np.float32).reshape(-1, 1)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [TCN],
    'kernel_size': [3, 5],
    'num_channels': [(16, 32), (32, 32)],
    'noise_sigma': [0.05],
    'dropout': [0.5],
    'batch_size': [2048],
})

wrapper: tl.RECOneModelTSWrapper = tl.RECOneModelTSWrapper(TCN(), 24, 3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=2)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
X = df['grad'].to_numpy(dtype=np.float32).reshape(-1, 1)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [LSTMModel, GRUModel],
    'features': [1],
    'hidden_size': [10, 15, 20],
    'num_layers': [2],
    'bidirectional': [True],
    'hid_noise': [0.05],
    'dropout': [0.5],
    'batch_size': [2048],
})

wrapper: tl.RECOneModelTSWrapper = tl.RECOneModelTSWrapper(GRUModel(), 24, 3)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=2)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

Let's try external features, will only predict 1 hour ahead here, the MMRec will give us the rest.

In [11]:
X = df.to_numpy(dtype=np.float32)[:, 1:] # remove el_load
y = df['grad'].to_numpy(dtype=np.float32)

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [LSTMModel, GRUModel],
    'features': [10],
    'hidden_size': [15, 20],
    'num_layers': [2],
    'bidirectional': [True],
    'hid_noise': [0.05],
    'dropout': [0.5],
    'batch_size': [2048],
})

wrapper: tl.MIMOTSWrapper = tl.MIMOTSWrapper(GRUModel(), seq_len=24, pred_len=1)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

[Grid search 001] BEGIN - params: {'epochs': 1000, 'lr': 0.001, 'model': <class '__main__.LSTMModel'>, 'features': 10, 'hidden_size': 15, 'num_layers': 2, 'bidirectional': True, 'hid_noise': 0.05, 'dropout': 0.5, 'batch_size': 2048}
[Fold 1] BEGIN - END - RMSE loss: 28.758 - Time: 0.8 min.
[Fold 2] BEGIN - END - RMSE loss: 9.638 - Time: 1.4 min.
[Fold 3] BEGIN - END - RMSE loss: 8.097 - Time: 1.1 min.
[Fold 4] BEGIN - END - RMSE loss: 6.651 - Time: 2.4 min.
[Fold 5] BEGIN - END - RMSE loss: 7.300 - Time: 1.9 min.
[Fold 6] BEGIN - END - RMSE loss: 7.348 - Time: 1.9 min.
[Grid search 001] END - Score: 11.29871416 * Without 1st split: 7.806904682982908
[Grid search 002] BEGIN - params: {'epochs': 1000, 'lr': 0.001, 'model': <class '__main__.GRUModel'>, 'features': 10, 'hidden_size': 15, 'num_layers': 2, 'bidirectional': True, 'hid_noise': 0.05, 'dropout': 0.5, 'batch_size': 2048}
[Fold 1] BEGIN - END - RMSE loss: 10.326 - Time: 1.7 min.
[Fold 2] BEGIN - END - RMSE loss: 7.790 - Time: 1.7 

### el_load non-recursive

In [None]:
X = df.to_numpy(dtype=np.float32)
y = df['el_load'].to_numpy(dtype=np.float32)

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [LSTMModel, GRUModel],
    'hidden_size': [15, 25, 35],
    'num_layers': [2, 3],
    'bidirectional': [True],
    'batch_size': [2048],
    'dropout': [0.5],
    'hid_noise': [0.05],
})

wrapper: tl.MIMOTSWrapper = tl.MIMOTSWrapper(LSTMModel(), seq_len=24, pred_len=1)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

No major differences here, in the first run GRU with 25 hidden size and 2 layers were the best

In [None]:
X = df.to_numpy(dtype=np.float32)
y = df['el_load'].to_numpy(dtype=np.float32)

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [LSTMModel, GRUModel],
    'hidden_size': [10, 15],
    'num_layers': [2],
    'bidirectional': [True],
    'batch_size': [2048],
    'dropout': [0.5],
    'hid_noise': [0.05],
})

wrapper: tl.MIMOTSWrapper = tl.MIMOTSWrapper(LSTMModel(), seq_len=24, pred_len=1)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

let's try a single layer

In [None]:
X = df.to_numpy(dtype=np.float32)
y = df['el_load'].to_numpy(dtype=np.float32)

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [GRUModel],
    'hidden_size': [25, 30, 35],
    'num_layers': [1],
    'bidirectional': [True],
    'batch_size': [2048],
    'dropout': [0.5],
    'hid_noise': [0.05],
    'es_p': [20],
})

wrapper: tl.MIMOTSWrapper = tl.MIMOTSWrapper(GRUModel(), seq_len=24, pred_len=1)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
X = df.to_numpy(dtype=np.float32)
y = df['el_load'].to_numpy(dtype=np.float32)

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [GRUModel],
    'hidden_size': [40, 45],
    'num_layers': [1],
    'bidirectional': [True],
    'batch_size': [2048],
    'dropout': [0.5],
    'hid_noise': [0.05],
    'es_p': [20],
})

wrapper: tl.MIMOTSWrapper = tl.MIMOTSWrapper(GRUModel(), seq_len=24, pred_len=1)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=3)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

### putting it all together

In [None]:
class MultiModelRec(nn.Module):
    def __init__(self, features=11, pred_len=3, hidden_size=15, num_layers=2, dropout=0.0,
                 hid_noise=0.0, bidirectional=True,  **kwargs):
        super(MultiModelRec, self).__init__()
        self.out_features = 3
        self.pred_len = pred_len
        
        self.gru = GRUModel(features, hidden_size, num_layers, dropout, hid_noise, bidirectional)
        self.tcn = TCN((32,) * 2, kernel_size=5, dropout=dropout, hid_noise=hid_noise)
        self.conv = ConvNet((16, 32), (6, 12), 0.5, 0.05)


    def forward(self, x, y, teacher_forcing=0.0):
        batch_size = x.shape[0]

        if y.shape[2] != self.gru.gru.input_size:
            pre_calc = torch.concat((
                torch.zeros(batch_size, self.pred_len, self.out_features).to(tl.TRAINER_LIB_DEVICE),
                y), dim=2)
            teacher_forcing = 0.0
        else:
            pre_calc = y

        output = torch.zeros(batch_size, self.pred_len).to(tl.TRAINER_LIB_DEVICE)

        for i in range(self.pred_len):
            out = torch.concat((
                self.gru(x),
                self.tcn(x[:, :, 1]),
                self.conv(x[:, :, 2])
            ), dim=1)

            output[:, i] = out[:, 0]
            
            x = torch.cat((x[:, 1:], pre_calc[:, i].unsqueeze(1)), dim=1)
            for j in range(self.out_features):  # roll teacher forcing for each feature
                if torch.rand(1) > teacher_forcing:
                    x[:, -1, j] = out[:, j]

        return output


X = df.to_numpy(dtype=np.float32)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [MultiModelRec],
    'hidden_size': [25, 30, 35],
    'num_layers': [2],
    'bidirectional': [True],
    'batch_size': [1024],
    'dropout': [0.5],
    'hid_noise': [0.05],
    'es_p': [20],
})

wrap: tl.RECMultiModelTSWrapper = tl.RECMultiModelTSWrapper(MultiModelRec(), 24, 3, 3, teacher_forcing_decay=0.01)
b_p, b_s = wrap.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
X = df.to_numpy(dtype=np.float32)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [MultiModelRec],
    'hidden_size': [55, 50, 45],
    'num_layers': [1],
    'bidirectional': [True],
    'batch_size': [1024],
    'dropout': [0.5],
    'hid_noise': [0.05],
    'es_p': [25],
})

wrap: tl.RECMultiModelTSWrapper = tl.RECMultiModelTSWrapper(MultiModelRec(), 24, 3, 3, teacher_forcing_decay=0.01)
b_p, b_s = wrap.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

### testing MMRec with full feature access

In [0]:
class FullMultiModelRec(nn.Module):
    def __init__(self, features=11, pred_len=3, hidden_size=15, num_layers=2, dropout=0.0,
                 hid_noise=0.0, bidirectional=True,  **kwargs):
        super(FullMultiModelRec, self).__init__()
        self.out_features = 3
        self.pred_len = pred_len
        
        self.gru = GRUModel(features, hidden_size, num_layers, dropout, hid_noise, bidirectional)
        self.ft1 = GRUModel(features-1, 20, 2, dropout, hid_noise, True)
        self.ft2 = GRUModel(features-1, 20, 2, dropout, hid_noise, True)


    def forward(self, x, y, teacher_forcing=0.0):
        batch_size = x.shape[0]

        if y.shape[2] != self.gru.gru.input_size:
            pre_calc = torch.concat((
                torch.zeros(batch_size, self.pred_len, self.out_features).to(tl.TRAINER_LIB_DEVICE),
                y), dim=2)
            teacher_forcing = 0.0
        else:
            pre_calc = y

        output = torch.zeros(batch_size, self.pred_len).to(tl.TRAINER_LIB_DEVICE)

        for i in range(self.pred_len):
            out = torch.concat((
                self.gru(x),
                self.ft1(x[:, :, 1:]),
                self.ft2(x[:, :, 1:])
            ), dim=1)

            output[:, i] = out[:, 0]
            
            x = torch.cat((x[:, 1:], pre_calc[:, i].unsqueeze(1)), dim=1)
            for j in range(self.out_features):  # roll teacher forcing for each feature
                if torch.rand(1) > teacher_forcing:
                    x[:, -1, j] = out[:, j]

        return output

In [12]:
X = df.to_numpy(dtype=np.float32)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [FullMultiModelRec],
    'hidden_size': [25, 30, 35],
    'num_layers': [2],
    'bidirectional': [True],
    'batch_size': [1024],
    'dropout': [0.5],
    'hid_noise': [0.05],
    'es_p': [20],
})

wrap: tl.RECMultiModelTSWrapper = tl.RECMultiModelTSWrapper(FullMultiModelRec(), 24, 3, 3, teacher_forcing_decay=0.01)
b_p, b_s = wrap.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

[Grid search 001] BEGIN - params: {'epochs': 1000, 'lr': 0.001, 'model': <class '__main__.FullMultiModelRec'>, 'hidden_size': 25, 'num_layers': 2, 'bidirectional': True, 'batch_size': 1024, 'dropout': 0.5, 'hid_noise': 0.05, 'es_p': 20}
[Fold 1] BEGIN
Early stopping... Epoch 120: train loss: 0.033296, val loss: 0.033607, test loss: 0.076005
[Fold 1] END - RMSE loss: 137.309 - Time: 1.6 min.
[Fold 2] BEGIN
Early stopping... Epoch 132: train loss: 0.028707, val loss: 0.008406, test loss: 0.013639
[Fold 2] END - RMSE loss: 82.663 - Time: 3.2 min.
[Fold 3] BEGIN
Early stopping... Epoch 181: train loss: 0.021208, val loss: 0.004782, test loss: 0.014034
[Fold 3] END - RMSE loss: 83.895 - Time: 6.3 min.
[Fold 4] BEGIN
Early stopping... Epoch 240: train loss: 0.019898, val loss: 0.020978, test loss: 0.018359
[Fold 4] END - RMSE loss: 88.140 - Time: 9.6 min.
[Fold 5] BEGIN
Early stopping... Epoch 166: train loss: 0.020744, val loss: 0.011809, test loss: 0.019529
[Fold 5] END - RMSE loss: 99.280

KeyboardInterrupt: 

In [14]:
X = df.to_numpy(dtype=np.float32)
y = X.copy()

grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [FullMultiModelRec],
    'hidden_size': [40, 45, 50],
    'num_layers': [1],
    'bidirectional': [True],
    'batch_size': [1024],
    'dropout': [0.5],
    'hid_noise': [0.05],
    'es_p': [25],
})

wrap: tl.RECMultiModelTSWrapper = tl.RECMultiModelTSWrapper(FullMultiModelRec(), 24, 3, 3, teacher_forcing_decay=0.01)
b_p, b_s = wrap.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

[Grid search 001] BEGIN - params: {'epochs': 1000, 'lr': 0.001, 'model': <class '__main__.FullMultiModelRec'>, 'hidden_size': 40, 'num_layers': 1, 'bidirectional': True, 'batch_size': 1024, 'dropout': 0.5, 'hid_noise': 0.05, 'es_p': 25}
[Fold 1] BEGIN
Early stopping... Epoch 127: train loss: 0.032839, val loss: 0.025279, test loss: 0.049995
[Fold 1] END - RMSE loss: 135.489 - Time: 1.6 min.
[Fold 2] BEGIN
Early stopping... Epoch 179: train loss: 0.025955, val loss: 0.008902, test loss: 0.015419
[Fold 2] END - RMSE loss: 85.133 - Time: 3.5 min.
[Fold 3] BEGIN
Early stopping... Epoch 298: train loss: 0.020169, val loss: 0.004430, test loss: 0.015255
[Fold 3] END - RMSE loss: 84.560 - Time: 8.4 min.
[Fold 4] BEGIN
Early stopping... Epoch 168: train loss: 0.022933, val loss: 0.020945, test loss: 0.017838
[Fold 4] END - RMSE loss: 94.099 - Time: 6.4 min.
[Fold 5] BEGIN
Early stopping... Epoch 224: train loss: 0.021980, val loss: 0.011119, test loss: 0.019803
[Fold 5] END - RMSE loss: 98.655