### Imports

In [None]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import trainer_lib as tl
import torch_model_definitions as tmd

### Load data

In [None]:
df: pd.DataFrame = tl.load_country_wide_dataset('../data/country_data.csv', until='2019-12-31 23:00:00')
# I'll limit the amount of data to make it run faster
# Models generally performed the best on this part of the dataset in my TDK

X = df.to_numpy(dtype=np.float32)
y = df['el_load'].to_numpy(dtype=np.float32)

### Grid search

In [None]:
# this is the setup that performed the best in my TDK with 24 seq_len and 3 pred_len, I won't grid search it further, it seems to perfrom well
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [tmd.Seq2seq],
    'embedding_size': [10],
    'num_layers': [1],
    'bidirectional': [True],
    'dropout': [0.5],
    'out_noise': [0.05],
    'batch_size': [2048],
    'pred_len': [12],
    'es_p': [20]
}) # n_splits defaulted to 2, val_mod to 8

wrapper = tl.S2STSWRAPPER(tmd.Seq2seq(), seq_len=24, pred_len=12, teacher_forcing_decay=0.01)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")

In [None]:
grid = tl.Grid({
    'epochs': [1000],  # we use early stopping, so this is just a high number
    'lr': [0.001],
    'model': [tmd.AttentionSeq2seq],
    'embedding_size': [6, 8, 10],
    'bidirectional': [True],
    'dropout': [0.0],  # set to 0, the model wasn't showing signs of overfitting
    'out_noise': [0.00],  # set to 0, the model wasn't showing signs of overfitting
    'batch_size': [1024],
    'pred_len': [12],
    'es_p': [20]
}) # n_splits defaulted to 2, val_mod to 8

wrapper = tl.S2STSWRAPPER(tmd.AttentionSeq2seq(), seq_len=24, pred_len=12, teacher_forcing_decay=0.01)
b_p, b_s = wrapper.grid_search(X, y, grid, verbose=4)
print(f"\nBest params: {b_p}\nBest score: {b_s}")