## Colab Init

Don't need to run if you cloned the repo

In [None]:
!rm -rf sample_data

!wget https://raw.githubusercontent.com/MeepOwned13/dnn_el_load/main/data/country_data.csv -P data/
!wget https://raw.githubusercontent.com/MeepOwned13/dnn_el_load/main/models/trainer_lib.py -P models/
!wget https://raw.githubusercontent.com/MeepOwned13/dnn_el_load/main/models/torch_model_definitions.py -P models/
!wget https://raw.githubusercontent.com/MeepOwned13/dnn_el_load/main/models/params.py -P models/

!npx degit github:MeepOwned13/dnn_el_load/final_eval_results final_eval_results

!python -m pip install overrides

## Imports and Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import torch
import models.torch_model_definitions as tmd
import models.trainer_lib as tl
from models.params import PARAMS
from copy import deepcopy
import random

results = pd.read_csv('final_eval_results/results.csv')

MODEL_NAMES = {
    'reg_s2s': 'Regular Seq2Seq',
    'att_s2s': 'Attention Seq2Seq',
    'pos_att_s2s': 'Concatenated Positional Encoding & Attention',
    'add_pos_att_s2s': 'Additive Positional Encoding & Attention',
}

# Getting all available pretrained models
pts = [f[:-3] for f in os.listdir('final_eval_results') if f[-3:] == '.pt']
regex = re.compile(r'(\d{2})_(\d{1,2})_(\w*)')

available_24 = pd.DataFrame(columns=results['Model'].unique(),
                            index=results['Prediction length'].unique(),
                            dtype=str).fillna("\u2a2f")
available_48 = available_24.copy()

for f in pts:
    match = regex.match(f)
    
    if match.group(1) == '24':
        available_24[match.group(3)][int(match.group(2))] = "\u2714"
    elif match.group(1) == '48':
        available_48[match.group(3)][int(match.group(2))] = "\u2714"

print(f"Pretrained models available for 24 hour lookback (Model - Prediction length):\n{available_24.to_string(justify='center')}")

print(f"\nPretrained models available for 48 hour lookback (Model - Prediction length):\n{available_48.to_string(justify='center')}")

## Choose a model

Keep in mind when loading: this section always returns the model that performed best from the given runs.

In [None]:
# train the model right now?, make sure to change runtime type to GPU!
TRAIN_MODEL = False
CUSTOM_EPOCHS = 10 # set to None for default
# choice of model, sequence and lookback in the following format:
    # "<sequence_length>_<lookback_length>_<model>" [e.g. "24_6_reg_s2s"]
CHOICE = "48_48_pos_att_s2s"

### Above are configurable parameters ###
if CHOICE not in pts:
    raise Exception(f"Choice {CHOICE} not available, check 'Imports and Setup' cell output")

if TRAIN_MODEL and tl.TRAINER_LIB_DEVICE == torch.device("cpu"):
    print("WARNING: Using CPU for training might make it take a long time, use CUDA instead if possible")

# Setting up params which are not predefined
match = regex.match(CHOICE)
params = deepcopy(PARAMS[match.group(3)])
params['seq_len'] = int(match.group(1))
params['pred_len'] = int(match.group(2))
params['model_params']['pred_len'] = int(match.group(2))
# Models were trained with a +2 embedding size on 48 hour sequence length
if params['seq_len'] > 24:
    params['model_params']['embedding_size'] += 2

# Print info
print(f"{'Training' if TRAIN_MODEL else 'Loading'} {MODEL_NAMES[match.group(3)]} model, "
      f"using {params['seq_len']} sequence length (lookback) and predicting {params['pred_len']} hour(s) ahead.")

# Load data
dataset = tl.load_country_wide_dataset('data/country_data.csv', until='2019-12-31 23:00:00')
X = dataset.to_numpy(dtype=np.float32)
y = dataset['el_load'].to_numpy(dtype=np.float32)

# Define splits (predetermined, using 2/3 for training, 1/3 for testing
# No cross validation performed to allow more models and configs for testing
split_len = len(X) // 3
train_val_sp: int = split_len * 2 - split_len // 8
val_test_sp: int = split_len * 2
x_train, x_val, x_test = X[:train_val_sp], X[train_val_sp:val_test_sp], X[val_test_sp:]
y_train, y_val, y_test = y[:train_val_sp], y[train_val_sp:val_test_sp], y[val_test_sp:]

if TRAIN_MODEL:
    params['epochs'] = CUSTOM_EPOCHS or params['epochs']
    
    wrapper = tl.S2STSWrapper(params['model'](**params['model_params']).to(tl.TRAINER_LIB_DEVICE), params['seq_len'], params['pred_len'])
    result = wrapper.train_strategy(x_train, y_train, x_val, y_val, x_test, y_test, **params)
    
    y_pred, y_true = wrapper.predict_for_comparison(x_test, y_test)
    tl.TSMWrapper.plot_losses([result[0]], [result[1]], [result[2]])
else:
    path = f"final_eval_results/{CHOICE}.pt"
    wrapper = tl.S2STSWrapper(params['model'](**params['model_params']).to(tl.TRAINER_LIB_DEVICE), params['seq_len'], params['pred_len'])
    
    wrapper.load_state(path)
    y_pred, y_true = wrapper.predict_for_comparison(x_test, y_test)

tl.TSMWrapper.print_evaluation_info(y_pred, y_true, 0)

## Prediction graph

In [None]:
# Index of entry to plot, None means random
PLOT_START = None

### Above are configurable parameters ###

if PLOT_START is None:
    PLOT_START = random.randint(0, y_true.shape[0] - 1)
    
if PLOT_START > y_true.shape[0] - 1:
    raise ValueError(f"Index {PLOT_START} doesn't exist, maximum is {y_true.shape[0] - 1}")

plt.plot(y_true[PLOT_START], label="True", color="green")
plt.plot(y_pred[PLOT_START], label="Prediction", color="red")

plt.legend()

plt.show()

## Overall performance comparison

In [None]:
means = results.groupby(['Sequence length', 'Prediction length', 'Model']).mean()
stds = results.groupby(['Sequence length', 'Prediction length', 'Model']).std()

means['Train Time'], stds['Train Time'] = means['Train Time'] / 60, stds['Train Time'] / 60
means['Pred Time'], stds['Pred Time'] = means['Pred Time'] * 1000, stds['Pred Time'] * 1000

rounding = {'MAE': 2, 'MSE': 1, 'RMSE': 2, 'MAPE': 4, 'MPE': 4, 'Train Time': 2, 'Pred Time': 0}
means, stds = means.round(rounding), stds.round(rounding)

mean_and_std = pd.DataFrame(columns=[], dtype=str)

for col in means.columns:
    mean_and_std[col] = means[col].astype(str) + "\u00b1" + stds[col].astype(str)

print(f'Train Time is in minutes, Pred Time is in milliseconds (calculated for predicting the entire test set)')
mean_and_std

## Comparison graphs

In [None]:
def plot_per_model(data: pd.DataFrame, ylabel: str, axs=None, title=None):
    """Make sure to call plt.show() after figure is done."""
    if axs is None:
        plt.title(title)
        plt.xlabel('Hour')
        plt.ylabel(ylabel)
        plt.xticks([6, 12, 24, 48])
        plt.grid()
        axs = plt
    else:
        axs.set_title(title)
        axs.set_xlabel('Hour')
        axs.set_ylabel(ylabel)
        axs.set_xticks([6, 12, 24, 48])
        axs.grid()
    
    for m in data['Model'].unique():
        per_model = data[data['Model'] == m].drop('Model', axis=1).to_numpy()
        x, y = per_model[:, 0], per_model[:, 1]
        axs.plot(x, y, marker='o', linewidth=1, linestyle='dashed', label=MODEL_NAMES.get(m, m))
        
        axs.legend()
    
def plot_for_len(data: pd.DataFrame, seq_len=24, to_plot: str = 'RMSE'):
    if to_plot not in data.drop(['Model', 'Sequence length', 'Prediction length'], axis=1, errors="ignore").columns:
        raise ValueError(f"Can't plot {to_plot}, it doesn't exist in the DataFrame")
    
    gp = data.groupby(['Model', 'Sequence length', 'Prediction length'], as_index=False)[['Model', to_plot]]

    gmean = gp.mean([to_plot])
    gmin = gp.min([to_plot])
    
    gmean = gmean[gmean['Sequence length'] == seq_len].drop('Sequence length', axis=1)
    gmin = gmin[gmin['Sequence length'] == seq_len].drop('Sequence length', axis=1)
    
    fig, axs = plt.subplots(1, 2, figsize=(16, 8))
    
    plot_per_model(gmean, to_plot, axs[0], f'AVG for {seq_len} sequence')
    plot_per_model(gmin, to_plot, axs[1], f'MIN for {seq_len} sequence')
    plt.show()

plot_for_len(results, 24, 'RMSE')
plot_for_len(results, 48, 'RMSE')