In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from itertools import product
import copy
import os
import sys

repo_root = os.path.abspath("..")
sys.path.append(repo_root)

import train
from dataset import myDataset
from lstm import myLSTM
from utils import load_config

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dataset = myDataset(device, 5)
test_dataset = myDataset(device, 5, False)


config_path = os.path.join(repo_root, "config.yaml")
base_config = load_config(config_path)

In [None]:
lr_list = [1e-3, 5e-4]
batch_sizes = [32, 64]
hidden_sizes = [64, 128, 256]
training_results = dict()

for lr, bs, hs in product(lr_list, batch_sizes, hidden_sizes):
    config = copy.deepcopy(base_config)
    
    config["training"]["lr"] = lr
    config["training"]["batch_size"] = bs
    config["model"]["hidden_size"] = hs
    
    config["experiment_name"] = f"lr{lr}_bs{bs}_hs{hs}"
    model = myLSTM(**config["model"]).to(device)
    training_results[config["experiment_name"]] = \
    train.fit_lstm(model, config["experiment_name"], train_dataset, test_dataset, **config["training"])
    print()
    break
    

In [None]:
# Not increasing LR for larger batches, because experiments show that model plateaus quite early on, and LR needs to be lower
lr_list = [5e-4, 2.5e-4] 
batch_sizes = [128, 256]
hidden_sizes = [128]
training_results_2 = dict()

for lr, bs, hs in product(lr_list, batch_sizes, hidden_sizes):
    config2 = copy.deepcopy(base_config)
    
    config2["training"]["lr"] = lr
    config2["training"]["batch_size"] = bs
    config2["model"]["hidden_size"] = hs
    config2["training"]["num_epochs"] = 100
    config2["experiment_name"] = f"lr{lr}_bs{bs}_hs{hs}"
    model = myLSTM(**config2["model"]).to(device)
    training_results_2[config2["experiment_name"]] = \
    train.fit_lstm(model, config2["experiment_name"], train_dataset, test_dataset, **config2["training"])
    print()
    break

In [None]:
final_results = dict()
final_config = copy.deepcopy(base_config)

final_config["training"]["lr"] = 0.001
final_config["training"]["batch_size"] = 64
final_config["training"]["num_epochs"] = 200
final_config["model"]["hidden_size"] = 128

final_config["experiment_name"] = f"lr{0.001}_bs{64}_hs{128}"
final_short_window_lstm = myLSTM(**final_config["model"]).to(device)
final_results[final_config["experiment_name"]] = \
train.fit_lstm(final_short_window_lstm, final_config["experiment_name"], 
               train_dataset, test_dataset, **final_config["training"])
print()

In [None]:
save_dir = os.path.join(repo_root, "saved_model_weights")
os.makedirs(save_dir, exist_ok=True)

save_path = os.path.join(save_dir, "short_window_LSTM_weights.pth")
torch.save(final_short_window_lstm.state_dict(), save_path)