In [None]:
# This code will connect the Colab program to the Google Drive folder
# It will create a pop-up window where access has to be granted
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import datetime
import numpy as np
import pandas as pd

import torch.nn as nn
import math
from torch import nn, Tensor

import matplotlib.pyplot as plt
from tqdm import tqdm
from datetime import timedelta

import random
import sys

# Path to the Transformer folder
path = "/content/drive/My Drive/Timeseries_Transformer" ########### This line should be changed ###########

In [None]:

# Checks if the program is running with a GPU
assert torch.cuda.is_available() == True, f"GPU is not available, please select another run-time type in Colab that does support GPU."

# Sets the device to be the GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:

# Sets path read in code from the Transformer folder
sys.path.insert(0, path)

import utils
import Transformer_Model as tsModel

In [None]:
# Parameters:
dim_val = 512
n_heads = 8
n_decoder_layers = 4
n_encoder_layers = 4
step_size = 1 # Step size, i.e. how many time steps does the moving window move at each step
in_features_encoder_linear_layer = 2048
in_features_decoder_linear_layer = 2048

batch_first = False

In [None]:
# Get data
data = pd.read_csv(f'{path}/Data/Train_data.csv')
data

Unnamed: 0,Ticker,Start,End,Std,Mean,Target,0,1,2,3,...,42,43,44,45,46,47,48,49,50,51
0,0848680D US Equity,2011-12-26,2012-12-24,0.070169,0.018523,-0.571562,-0.080081,-1.143180,0.431351,-0.639025,...,-0.289046,-0.748845,0.395460,-0.615254,-0.417208,-0.085343,-0.012613,-0.414256,-0.518012,-0.418591
1,ON US Equity,2011-12-26,2012-12-24,0.045480,-0.004158,2.034040,0.435876,1.908284,-0.641805,2.036218,...,0.164235,0.451889,1.172715,-0.811848,-0.887582,1.158916,1.444163,0.584085,0.763100,-0.483041
2,OMV AV Equity,2011-12-26,2012-12-24,0.035286,0.003772,0.209700,0.189606,-0.059739,-0.307844,1.355347,...,-0.379288,-0.846366,1.199650,-1.522898,-0.122562,-0.070323,0.049260,0.177152,-0.256289,-0.081080
3,OML LN Equity,2011-12-26,2012-12-24,0.026060,0.005241,2.246997,-0.144441,0.277326,1.226403,1.460132,...,1.055692,-1.223971,0.045677,-0.923465,0.386810,-0.766281,0.520807,0.419635,-0.157164,0.860373
4,OMI US Equity,2011-12-26,2012-12-24,0.022801,0.000982,-0.559379,-0.576399,-0.359849,0.541192,2.323201,...,-0.821738,0.401402,0.169911,0.032740,-0.149242,-0.119050,-2.563318,0.818454,0.943932,0.112350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144149,EXEL US Equity,2016-12-19,2017-12-18,0.064486,0.009458,0.105186,-1.007860,-0.658160,2.407820,0.652714,...,1.761518,-1.276974,-0.672783,0.607669,-0.918858,-0.386630,0.288501,0.537541,-0.807384,-0.353482
144150,EXP US Equity,2016-12-19,2017-12-18,0.024582,0.001305,2.050313,-0.049513,-0.423011,0.144621,0.679837,...,0.321916,-0.270173,-0.937015,-0.941007,0.189209,0.384867,-0.389854,3.224025,0.495431,-1.438569
144151,EXPD US Equity,2016-12-19,2017-12-18,0.019928,0.001883,4.982290,-0.753607,-0.790787,-0.018738,0.684680,...,-1.040213,0.177536,-0.622907,-1.028403,1.553974,-0.365500,0.881032,3.983572,-0.693140,0.527320
144152,EWT US Equity,2016-12-19,2017-12-18,0.016259,0.005000,-1.994484,-1.345934,0.133875,1.569252,0.719807,...,1.724402,-0.764204,0.230405,-0.161596,-1.024008,-0.062274,-0.421819,-1.445724,-0.993960,-0.055474


In [None]:

# Creating the train and validation data

np.random.seed(43)
# Stratified sample based on the end date
validation_df = data.groupby('End', group_keys=False).apply(lambda x: x.sample(frac=0.15))
#validation_df = data.sample(frac=0.15)
train_df = data[ ~data.index.isin(validation_df.index)]

print(f'Number of training sequences: {len(train_df)}')
print(f'Number of validation sequences: {len(validation_df)}')

Number of training sequences: 122536
Number of validation sequences: 21618


### Training the model

This section creates training loops for the models


In [None]:
######### MSE Model #########

model_version = 'MSE_model'

# Define the loss function
criterion = nn.MSELoss().cuda()
corr = utils.NegCorr().cuda()

batch_size = 128
enc_seq_len = 48 # length of input given to encoder
output_sequence_length = 1 # target sequence length. The length we want to predict

## Creates the mask (Same for ecery iteration in the training since it's just a mask)
# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    ).cuda()

# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=output_sequence_length
    ).cuda()

# Define the model
model = tsModel.TimeSeriesTransformer(
                                      input_size=1,
                                      dec_seq_len=enc_seq_len,
                                      batch_first=False,
                                      num_predicted_features=1,
                                      n_encoder_layers = n_encoder_layers,
                                      n_decoder_layers = n_decoder_layers,
                                      n_heads = n_heads,
                                      PE = 'original',
                                      batch_size = batch_size
                                      ).cuda()


## Creating the train

## Creating the train

# Chooses the optimizer and set learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

# If we want to train the model from a certain already trained epoch
#model.load_state_dict(torch.load(f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_20.pth'))

# Number of epochs
epochs = 100

# List for our losses
losses = []
v_losses = []
corr_loss = []

loss_min = 100

# Training loop
for i in range(epochs):

    # Creates a decreasing learning rate based on the current epoch
    if i > 100:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)
    if i > 125:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)
    if i > 150:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.0000005)
    if i > 150:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.0000001)

    # Makes sure we train on the data
    model.train(True)

    # Create data
    input_data = utils.dataset(train_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect train losses for the average loss of the training set
    t_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in tqdm(input_data):

        # Change the dimensions
        src, trg, trg_y = utils.change_dim(src, trg, trg_y, batch_first)

        # Send data through the model and get the prediction results
        y_pred = model.forward( src=src,
                                tgt=trg,
                                src_mask=src_mask,
                                tgt_mask=tgt_mask
                                )

        # Measure the loss/error of the predicted value vs the train_target (trg_y)
        loss = criterion(y_pred.squeeze(-1), trg_y)


        # #Sending the train data through the model
        # y_pred = utils.run_encoder_decoder_inference(
        #                                             model=model,
        #                                             src=src,
        #                                             forecast_window=output_sequence_length,
        #                                             batch_size=src.shape[1],
        #                                             device = device
        #                                             )

        # # Stacking mean and std for the loss function
        # mean = torch.vstack([mean, mean, mean, mean]).to(device)
        # std = torch.vstack([std, std, std, std]).to(device)

        #Calculating the loss/error on the test data
        # loss = criterion(y_pred.squeeze(-1), trg_y)

        # Saves the vloss
        t_loss.append(loss.cpu().detach().numpy())

        # The backwards propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(f'Batch loss: {loss}')

    # Averaging over the t_loss of every batch
    avg_tloss = np.asarray(t_loss).mean()

    # Saves the loss
    losses.append(avg_tloss)

    # Turns off the training, since we want to calculate the validation loss
    model.train(False)

    # Create validation data
    validation_data = utils.dataset(validation_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect validation losses for the average loss of the validation
    v_loss = []
    c_loss = []

    # Loop over each batch
    for v_src, v_trg, v_trg_y, mean, std in validation_data:

      # Change the dimensions
      v_src, v_trg, v_trg_y = utils.change_dim(v_src, v_trg, v_trg_y, batch_first)

      # Send data through the model and get the prediction results
      y_vpred = model.forward( src=v_src,
                              tgt=v_trg,
                              src_mask=src_mask,
                              tgt_mask=tgt_mask
                              )

      # Measure the loss/error of the predicted value vs the train_target (trg_y)
      vloss = criterion(y_vpred.squeeze(-1), v_trg_y)


      # # Sending the test data through the model
      # y_vpred = utils.run_encoder_decoder_inference(
      #                                               model=model,
      #                                               src=v_src,
      #                                               forecast_window=output_sequence_length,
      #                                               batch_size=src.shape[1],
      #                                               device = device
      #                                               )

      # # Stacking mean and std for the loss function
      # mean = torch.vstack([mean, mean, mean, mean]).to(device)
      # std = torch.vstack([std, std, std, std]).to(device)

      # # Calculating the loss/error on the validation data
      # vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y, mean, std)

      closs = corr.loss(y_vpred.squeeze(-1).squeeze(-1), v_trg_y.squeeze(-1))

      # Saves the vloss
      v_loss.append(vloss.cpu().detach().numpy())

      c_loss.append(closs.cpu().detach().numpy())

    # Averaging over the v_loss of every batch
    avg_vloss = np.asarray(v_loss).mean()

    avg_closs = np.asarray(c_loss).mean()

    # Saves average v_loss for the epoch
    v_losses.append(avg_vloss)

    corr_loss.append(avg_closs)

    # Saves the model parameters whenever the average validation loss is the lowest occured
    if avg_vloss < loss_min:
      loss_min = avg_vloss
      torch.save(model.state_dict(), f'{path}/Models/{model_version}/Model_parameters.pth')

    # Saves all the individual parameters for every epoch
    torch.save(model.state_dict(), f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_{i}.pth')

    # Print the losses during the loop
    print(f'Epoch {i} with loss: {avg_tloss}, validation loss: {avg_vloss} and Negative Correlation: {avg_closs}')


# Plots the train and validation loss values
plt.plot( losses[:])
plt.plot( v_losses[:])
plt.ylabel('MSE')
plt.xlabel('Epoch')

loss = pd.DataFrame([losses, v_losses, corr_loss], index= ['Train loss', 'Validation loss', 'Negative correlation']).T
loss.to_excel(f'{path}/Models/{model_version}/Training loss values.xlsx')

In [None]:
######### MSE_T2V Model #########

model_version = 'MSE_T2V'

# Define the loss function
criterion = nn.MSELoss().cuda()
corr = utils.NegCorr().cuda()

batch_size = 1024
enc_seq_len = 48 # length of input given to encoder
output_sequence_length = 1 # target sequence length. The length we want to predict

## Creates the mask (Same for ecery iteration in the training since it's just a mask)
# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    ).cuda()

# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=output_sequence_length
    ).cuda()

# Define the model
model = tsModel.TimeSeriesTransformer(
                                      input_size=1,
                                      dec_seq_len=enc_seq_len,
                                      batch_first=False,
                                      num_predicted_features=1,
                                      n_encoder_layers = n_encoder_layers,
                                      n_decoder_layers = n_decoder_layers,
                                      n_heads = n_heads,
                                      PE = 'T2V',
                                      batch_size = batch_size
                                      ).cuda()


## Creating the train

# Chooses the optimizer and set learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# If we want to train the model from a certain already trained epoch
#model.load_state_dict(torch.load(f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_20.pth'))

# Number of epochs
epochs = 75

# List for our losses
losses = []
v_losses = []
corr_loss = []

loss_min = 100

# Training loop
for i in range(epochs):

    # Creates a decreasing learning rate based on the current epoch
    if i > 25:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
    if i > 50:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
    if i > 75:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)
    if i > 125:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)

    # Makes sure we train on the data
    model.train(True)

    # Create data
    input_data = utils.dataset(train_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect train losses for the average loss of the training set
    t_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in tqdm(input_data):

        # Change the dimensions
        src, trg, trg_y = utils.change_dim(src, trg, trg_y, batch_first)

        # Send data through the model and get the prediction results
        y_pred = model.forward( src=src,
                                tgt=trg,
                                src_mask=src_mask,
                                tgt_mask=tgt_mask
                                )

        # Measure the loss/error of the predicted value vs the train_target (trg_y)
        loss = criterion(y_pred.squeeze(-1), trg_y)


        # #Sending the train data through the model
        # y_pred = utils.run_encoder_decoder_inference(
        #                                             model=model,
        #                                             src=src,
        #                                             forecast_window=output_sequence_length,
        #                                             batch_size=src.shape[1],
        #                                             device = device
        #                                             )

        # # Stacking mean and std for the loss function
        # mean = torch.vstack([mean, mean, mean, mean]).to(device)
        # std = torch.vstack([std, std, std, std]).to(device)

        #Calculating the loss/error on the test data
        # loss = criterion(y_pred.squeeze(-1), trg_y)

        # Saves the vloss
        t_loss.append(loss.cpu().detach().numpy())

        # The backwards propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(f'Batch loss: {loss}')

    # Averaging over the t_loss of every batch
    avg_tloss = np.asarray(t_loss).mean()

    # Saves the loss
    losses.append(avg_tloss)

    # Turns off the training, since we want to calculate the validation loss
    model.train(False)

    # Create validation data
    validation_data = utils.dataset(validation_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect validation losses for the average loss of the validation
    v_loss = []
    c_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in validation_data:

      # Change the dimensions
      v_src, v_trg, v_trg_y = utils.change_dim(src, trg, trg_y, batch_first)

      # Send data through the model and get the prediction results
      y_vpred = model.forward( src=v_src,
                              tgt=v_trg,
                              src_mask=src_mask,
                              tgt_mask=tgt_mask
                              )

      # Measure the loss/error of the predicted value vs the train_target (trg_y)
      vloss = criterion(y_vpred.squeeze(-1), v_trg_y)


      # # Sending the test data through the model
      # y_vpred = utils.run_encoder_decoder_inference(
      #                                               model=model,
      #                                               src=v_src,
      #                                               forecast_window=output_sequence_length,
      #                                               batch_size=src.shape[1],
      #                                               device = device
      #                                               )

      # # Stacking mean and std for the loss function
      # mean = torch.vstack([mean, mean, mean, mean]).to(device)
      # std = torch.vstack([std, std, std, std]).to(device)

      # # Calculating the loss/error on the validation data
      # vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y, mean, std)

      closs = corr.loss(y_vpred.squeeze(-1), v_trg_y)

      # Saves the vloss
      v_loss.append(vloss.cpu().detach().numpy())

      c_loss.append(closs.cpu().detach().numpy())

    # Averaging over the v_loss of every batch
    avg_vloss = np.asarray(v_loss).mean()

    avg_closs = np.asarray(c_loss).mean()

    # Saves average v_loss for the epoch
    v_losses.append(avg_vloss)

    corr_loss.append(avg_closs)

    # Saves the model parameters whenever the average validation loss is the lowest occured
    if avg_vloss < loss_min:
      loss_min = avg_vloss
      torch.save(model.state_dict(), f'{path}/Models/{model_version}/Model_parameters.pth')

    # Saves all the individual parameters for every epoch
    torch.save(model.state_dict(), f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_{i}.pth')

    # Print the losses during the loop
    print(f'Epoch {i} with loss: {avg_tloss}, validation loss: {avg_vloss} and Negative Correlation: {avg_closs}')


# Plots the train and validation loss values
plt.plot( losses[:])
plt.plot( v_losses[:])
plt.ylabel('MSE')
plt.xlabel('Epoch')

loss = pd.DataFrame([losses, v_losses, corr_loss], index= ['Train loss', 'Validation loss', 'Negative correlation']).T
loss.to_excel(f'{path}/Models/{model_version}/Training loss values.xlsx')

In [None]:
######### WMSE Model #########

model_version = 'WMSE_model'

# Define the loss function
criterion = utils.WMSE().cuda()
corr = utils.NegCorr().cuda()

batch_size = 128
enc_seq_len = 48 # length of input given to encoder
output_sequence_length = 1 # target sequence length. The length we want to predict

## Creates the mask (Same for ecery iteration in the training since it's just a mask)
# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    ).cuda()

# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=output_sequence_length
    ).cuda()

# Define the model
model = tsModel.TimeSeriesTransformer(
                                      input_size=1,
                                      dec_seq_len=enc_seq_len,
                                      batch_first=False,
                                      num_predicted_features=1,
                                      n_encoder_layers = n_encoder_layers,
                                      n_decoder_layers = n_decoder_layers,
                                      n_heads = n_heads,
                                      PE = 'original',
                                      batch_size = batch_size
                                      ).cuda()


## Creating the train

# Chooses the optimizer and set learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# If we want to train the model from a certain already trained epoch
#model.load_state_dict(torch.load(f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_20.pth'))

# Number of epochs
epochs = 100

# List for our losses
losses = []
v_losses = []
corr_loss = []

loss_min = 100

# Training loop
for i in range(epochs):

    # Creates a decreasing learning rate based on the current epoch
    if i > 25:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
    if i > 50:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
    if i > 75:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)
    if i > 125:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)

    # Makes sure we train on the data
    model.train(True)

    # Create data
    input_data = utils.dataset(train_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect train losses for the average loss of the training set
    t_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in tqdm(input_data):

        # Change the dimensions
        src, trg, trg_y = utils.change_dim(src, trg, trg_y, batch_first)

        # Send data through the model and get the prediction results
        y_pred = model.forward( src=src,
                                tgt=trg,
                                src_mask=src_mask,
                                tgt_mask=tgt_mask
                                )

        # Measure the loss/error of the predicted value vs the train_target (trg_y)
        loss = criterion.loss(y_pred.squeeze(-1), trg_y)


        # #Sending the train data through the model
        # y_pred = utils.run_encoder_decoder_inference(
        #                                             model=model,
        #                                             src=src,
        #                                             forecast_window=output_sequence_length,
        #                                             batch_size=src.shape[1],
        #                                             device = device
        #                                             )

        # # Stacking mean and std for the loss function
        # mean = torch.vstack([mean, mean, mean, mean]).to(device)
        # std = torch.vstack([std, std, std, std]).to(device)

        #Calculating the loss/error on the test data
        # loss = criterion(y_pred.squeeze(-1), trg_y)

        # Saves the vloss
        t_loss.append(loss.cpu().detach().numpy())

        # The backwards propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(f'Batch loss: {loss}')

    # Averaging over the t_loss of every batch
    avg_tloss = np.asarray(t_loss).mean()

    # Saves the loss
    losses.append(avg_tloss)

    # Turns off the training, since we want to calculate the validation loss
    model.train(False)

    # Create validation data
    validation_data = utils.dataset(validation_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect validation losses for the average loss of the validation
    v_loss = []
    c_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in validation_data:

      # Change the dimensions
      v_src, v_trg, v_trg_y = utils.change_dim(src, trg, trg_y, batch_first)

      # Send data through the model and get the prediction results
      y_vpred = model.forward( src=v_src,
                              tgt=v_trg,
                              src_mask=src_mask,
                              tgt_mask=tgt_mask
                              )

      # Measure the loss/error of the predicted value vs the train_target (trg_y)
      vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y)


      # # Sending the test data through the model
      # y_vpred = utils.run_encoder_decoder_inference(
      #                                               model=model,
      #                                               src=v_src,
      #                                               forecast_window=output_sequence_length,
      #                                               batch_size=src.shape[1],
      #                                               device = device
      #                                               )

      # # Stacking mean and std for the loss function
      # mean = torch.vstack([mean, mean, mean, mean]).to(device)
      # std = torch.vstack([std, std, std, std]).to(device)

      # # Calculating the loss/error on the validation data
      # vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y, mean, std)

      closs = corr.loss(y_vpred.squeeze(-1), v_trg_y)

      # Saves the vloss
      v_loss.append(vloss.cpu().detach().numpy())

      c_loss.append(closs.cpu().detach().numpy())

    # Averaging over the v_loss of every batch
    avg_vloss = np.asarray(v_loss).mean()

    avg_closs = np.asarray(c_loss).mean()

    # Saves average v_loss for the epoch
    v_losses.append(avg_vloss)

    corr_loss.append(avg_closs)

    # Saves the model parameters whenever the average validation loss is the lowest occured
    if avg_vloss < loss_min:
      loss_min = avg_vloss
      torch.save(model.state_dict(), f'{path}/Models/{model_version}/Model_parameters.pth')

    # Saves all the individual parameters for every epoch
    torch.save(model.state_dict(), f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_{i}.pth')

    # Print the losses during the loop
    print(f'Epoch {i} with loss: {avg_tloss}, validation loss: {avg_vloss} and Negative Correlation: {avg_closs}')


# Plots the train and validation loss values
plt.plot( losses[:])
plt.plot( v_losses[:])
plt.ylabel('MSE')
plt.xlabel('Epoch')

loss = pd.DataFrame([losses, v_losses, corr_loss], index= ['Train loss', 'Validation loss', 'Negative correlation']).T
loss.to_excel(f'{path}/Models/{model_version}/Training loss values.xlsx')

In [None]:
######### AdjMSE Model #########

model_version = 'AdjMSE_model'

# Define the loss function
criterion = utils.AdjMSELoss2().cuda()
corr = utils.NegCorr().cuda()

batch_size = 128
enc_seq_len = 48 # length of input given to encoder
output_sequence_length = 1 # target sequence length. The length we want to predict

## Creates the mask (Same for ecery iteration in the training since it's just a mask)
# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    ).cuda()

# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=output_sequence_length
    ).cuda()

# Define the model
model = tsModel.TimeSeriesTransformer(
                                      input_size=1,
                                      dec_seq_len=enc_seq_len,
                                      batch_first=False,
                                      num_predicted_features=1,
                                      n_encoder_layers = n_encoder_layers,
                                      n_decoder_layers = n_decoder_layers,
                                      n_heads = n_heads,
                                      PE = 'original',
                                      batch_size = batch_size
                                      ).cuda()


## Creating the train

# Chooses the optimizer and set learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# If we want to train the model from a certain already trained epoch
#model.load_state_dict(torch.load(f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_20.pth'))

# Number of epochs
epochs = 100

# List for our losses
losses = []
v_losses = []
corr_loss = []

loss_min = 100

# Training loop
for i in range(epochs):

    # Creates a decreasing learning rate based on the current epoch
    if i > 25:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
    if i > 50:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
    if i > 75:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)
    if i > 125:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)

    # Makes sure we train on the data
    model.train(True)

    # Create data
    input_data = utils.dataset(train_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect train losses for the average loss of the training set
    t_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in tqdm(input_data):

        # Change the dimensions
        src, trg, trg_y = utils.change_dim(src, trg, trg_y, batch_first)

        # Send data through the model and get the prediction results
        y_pred = model.forward( src=src,
                                tgt=trg,
                                src_mask=src_mask,
                                tgt_mask=tgt_mask
                                )

        # Measure the loss/error of the predicted value vs the train_target (trg_y)
        loss = criterion.loss(y_pred.squeeze(-1), trg_y)


        # #Sending the train data through the model
        # y_pred = utils.run_encoder_decoder_inference(
        #                                             model=model,
        #                                             src=src,
        #                                             forecast_window=output_sequence_length,
        #                                             batch_size=src.shape[1],
        #                                             device = device
        #                                             )

        # # Stacking mean and std for the loss function
        # mean = torch.vstack([mean, mean, mean, mean]).to(device)
        # std = torch.vstack([std, std, std, std]).to(device)

        #Calculating the loss/error on the test data
        # loss = criterion(y_pred.squeeze(-1), trg_y)

        # Saves the vloss
        t_loss.append(loss.cpu().detach().numpy())

        # The backwards propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(f'Batch loss: {loss}')

    # Averaging over the t_loss of every batch
    avg_tloss = np.asarray(t_loss).mean()

    # Saves the loss
    losses.append(avg_tloss)

    # Turns off the training, since we want to calculate the validation loss
    model.train(False)

    # Create validation data
    validation_data = utils.dataset(validation_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect validation losses for the average loss of the validation
    v_loss = []
    c_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in validation_data:

      # Change the dimensions
      v_src, v_trg, v_trg_y = utils.change_dim(src, trg, trg_y, batch_first)

      # Send data through the model and get the prediction results
      y_vpred = model.forward( src=v_src,
                              tgt=v_trg,
                              src_mask=src_mask,
                              tgt_mask=tgt_mask
                              )

      # Measure the loss/error of the predicted value vs the train_target (trg_y)
      vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y)


      # # Sending the test data through the model
      # y_vpred = utils.run_encoder_decoder_inference(
      #                                               model=model,
      #                                               src=v_src,
      #                                               forecast_window=output_sequence_length,
      #                                               batch_size=src.shape[1],
      #                                               device = device
      #                                               )

      # # Stacking mean and std for the loss function
      # mean = torch.vstack([mean, mean, mean, mean]).to(device)
      # std = torch.vstack([std, std, std, std]).to(device)

      # # Calculating the loss/error on the validation data
      # vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y, mean, std)

      closs = corr.loss(y_vpred.squeeze(-1), v_trg_y)

      # Saves the vloss
      v_loss.append(vloss.cpu().detach().numpy())

      c_loss.append(closs.cpu().detach().numpy())

    # Averaging over the v_loss of every batch
    avg_vloss = np.asarray(v_loss).mean()

    avg_closs = np.asarray(c_loss).mean()

    # Saves average v_loss for the epoch
    v_losses.append(avg_vloss)

    corr_loss.append(avg_closs)

    # Saves the model parameters whenever the average validation loss is the lowest occured
    if avg_vloss < loss_min:
      loss_min = avg_vloss
      torch.save(model.state_dict(), f'{path}/Models/{model_version}/Model_parameters.pth')

    # Saves all the individual parameters for every epoch
    torch.save(model.state_dict(), f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_{i}.pth')

    # Print the losses during the loop
    print(f'Epoch {i} with loss: {avg_tloss}, validation loss: {avg_vloss} and Negative Correlation: {avg_closs}')


# Plots the train and validation loss values
plt.plot( losses[:])
plt.plot( v_losses[:])
plt.ylabel('MSE')
plt.xlabel('Epoch')

loss = pd.DataFrame([losses, v_losses, corr_loss], index= ['Train loss', 'Validation loss', 'Negative correlation']).T
loss.to_excel(f'{path}/Models/{model_version}/Training loss values.xlsx')

In [None]:
######### NegCorr Model #########

model_version = 'NegCorr_model'

# Define the loss function
criterion = utils.NegCorr().cuda()
corr = nn.MSELoss().cuda()

batch_size = 128
enc_seq_len = 48 # length of input given to encoder
output_sequence_length = 1 # target sequence length. The length we want to predict

## Creates the mask (Same for ecery iteration in the training since it's just a mask)
# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    ).cuda()

# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=output_sequence_length
    ).cuda()

# Define the model
model = tsModel.TimeSeriesTransformer(
                                      input_size=1,
                                      dec_seq_len=enc_seq_len,
                                      batch_first=False,
                                      num_predicted_features=1,
                                      n_encoder_layers = n_encoder_layers,
                                      n_decoder_layers = n_decoder_layers,
                                      n_heads = n_heads,
                                      PE = 'original',
                                      batch_size = batch_size
                                      ).cuda()


## Creating the train

# Chooses the optimizer and set learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

# If we want to train the model from a certain already trained epoch
#model.load_state_dict(torch.load(f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_20.pth'))

# Number of epochs
epochs = 150

# List for our losses
losses = []
v_losses = []
corr_loss = []

loss_min = 100

# Training loop
for i in range(epochs):

    # Creates a decreasing learning rate based on the current epoch
    if i > 50:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)
    if i > 100:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)
    if i > 150:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.0000005)
    if i > 200:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.0000001)

    # Makes sure we train on the data
    model.train(True)

    # Create data
    input_data = utils.dataset(train_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect train losses for the average loss of the training set
    t_loss = []

    # Creates a list of all unique dates in the training data
    train_dates = train_df['End'].unique()
    # Shuffles the list for training
    random.shuffle(train_dates)


    for d in tqdm(train_dates):

      # Create data
      input_data = utils.dataset(train_df[ train_df['End'] == d], enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

      # Loop over each batch
      for src, trg, trg_y, mean, std in input_data:

          # Change the dimensions
          src, trg, trg_y = utils.change_dim(src, trg, trg_y, batch_first)

          # Send data through the model and get the prediction results
          y_pred = model.forward( src=src,
                                  tgt=trg,
                                  src_mask=src_mask,
                                  tgt_mask=tgt_mask
                                  )

          # Measure the loss/error of the predicted value vs the train_target (trg_y)
          loss = criterion.loss(y_pred.squeeze(-1), trg_y)


          # #Sending the train data through the model
          # y_pred = utils.run_encoder_decoder_inference(
          #                                             model=model,
          #                                             src=src,
          #                                             forecast_window=output_sequence_length,
          #                                             batch_size=src.shape[1],
          #                                             device = device
          #                                             )

          # # Stacking mean and std for the loss function
          # mean = torch.vstack([mean, mean, mean, mean]).to(device)
          # std = torch.vstack([std, std, std, std]).to(device)

          #Calculating the loss/error on the test data
          # loss = criterion(y_pred.squeeze(-1), trg_y)

          # Saves the vloss
          t_loss.append(loss.cpu().detach().numpy())

          # The backwards propagation
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          #print(f'Batch loss: {loss}')

    # Averaging over the t_loss of every batch
    avg_tloss = np.asarray(t_loss).mean()

    # Saves the loss
    losses.append(avg_tloss)

    # Turns off the training, since we want to calculate the validation loss
    model.train(False)

    # Create validation data
    validation_data = utils.dataset(validation_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect validation losses for the average loss of the validation
    v_loss = []
    c_loss = []

    # Creates a list of all unique dates in the validation data
    validation_dates = validation_df['End'].unique()

    for d in validation_dates:

      # Create validation data
      validation_data = utils.dataset(validation_df[ validation_df['End'] == d], enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= len(validation_df[ validation_df['End'] == d]) , device= device)

      # Loop over each batch
      for src, trg, trg_y, mean, std in validation_data:

        # Change the dimensions
        v_src, v_trg, v_trg_y = utils.change_dim(src, trg, trg_y, batch_first)

        # Send data through the model and get the prediction results
        y_vpred = model.forward( src=v_src,
                                tgt=v_trg,
                                src_mask=src_mask,
                                tgt_mask=tgt_mask
                                )

        # Measure the loss/error of the predicted value vs the train_target (trg_y)
        vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y)


        # # Sending the test data through the model
        # y_vpred = utils.run_encoder_decoder_inference(
        #                                               model=model,
        #                                               src=v_src,
        #                                               forecast_window=output_sequence_length,
        #                                               batch_size=src.shape[1],
        #                                               device = device
        #                                               )

        # # Stacking mean and std for the loss function
        # mean = torch.vstack([mean, mean, mean, mean]).to(device)
        # std = torch.vstack([std, std, std, std]).to(device)

        # # Calculating the loss/error on the validation data
        # vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y, mean, std)

        # Standardize both validation predictions and target before calculating MSE
        y_vpred = ((y_vpred.squeeze(-1) - torch.mean(y_vpred.squeeze(-1))) /torch.std(y_vpred.squeeze(-1)))
        v_trg_y = ((v_trg_y - torch.mean(v_trg_y)) /torch.std(v_trg_y))

        closs = corr(y_vpred, v_trg_y)

        # Saves the vloss
        v_loss.append(vloss.cpu().detach().numpy())

        c_loss.append(closs.cpu().detach().numpy())

    # Averaging over the v_loss of every batch
    avg_vloss = np.asarray(v_loss).mean()

    avg_closs = np.asarray(c_loss).mean()

    # Saves average v_loss for the epoch
    v_losses.append(avg_vloss)

    corr_loss.append(avg_closs)

    # Saves the model parameters whenever the average validation loss is the lowest occured
    if avg_vloss < loss_min:
      loss_min = avg_vloss
      torch.save(model.state_dict(), f'{path}/Models/{model_version}/Model_parameters.pth')

    # Saves all the individual parameters for every epoch
    torch.save(model.state_dict(), f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_{i}.pth')

    # Print the losses during the loop
    print(f'Epoch {i} with loss: {avg_tloss}, validation loss: {avg_vloss} and MSE: {avg_closs}')


# Plots the train and validation loss values
plt.plot( losses[:])
plt.plot( v_losses[:])
plt.ylabel('MSE')
plt.xlabel('Epoch')

loss = pd.DataFrame([losses, v_losses, corr_loss], index= ['Train loss', 'Validation loss', 'MSE']).T
loss.to_excel(f'{path}/Models/{model_version}/Training loss values.xlsx')

In [None]:
######### LSTM Model #########

model_version = 'LSTM_model'

# Define the loss function
criterion = nn.MSELoss().cuda()
corr = utils.NegCorr().cuda()

batch_size = 128

input_size = 1
seq_len = 48
output_size = 1
hidden_size = 100

# Define the LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Take the output from the last time step
        return out

model = LSTM(input_size, hidden_size, output_size).cuda()


## Creating the train

# Chooses the optimizer and set learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# If we want to train the model from a certain already trained epoch
#model.load_state_dict(torch.load(f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_20.pth'))

# Number of epochs
epochs = 100

# List for our losses
losses = []
v_losses = []
corr_loss = []

loss_min = 100

# Training loop
for i in range(epochs):

    # Creates a decreasing learning rate based on the current epoch
    if i > 50:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
    if i > 75:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
    if i > 100:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)
    if i > 125:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)

    # Makes sure we train on the data
    model.train(True)

    # Create data
    input_data = utils.dataset(train_df, enc_seq_len=seq_len, trg_seq_len= output_size, batch_size= batch_size, device= device)

    # List to collect train losses for the average loss of the training set
    t_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in tqdm(input_data):

        # Change the dimensions
        src, trg, trg_y = utils.change_dim(src, trg, trg_y, batch_first)

        # Transpose the data to match the LSTM model
        trg_y = torch.transpose(trg_y, 0, 1)
        src = torch.transpose(src, 0, 1)

        # Send data through the model and get the prediction results
        y_pred = model.forward( src  )

        # Measure the loss/error of the predicted value vs the train_target (trg_y)
        loss = criterion(y_pred.squeeze(-1), trg_y.squeeze(-1))


        # #Sending the train data through the model
        # y_pred = utils.run_encoder_decoder_inference(
        #                                             model=model,
        #                                             src=src,
        #                                             forecast_window=output_sequence_length,
        #                                             batch_size=src.shape[1],
        #                                             device = device
        #                                             )

        # # Stacking mean and std for the loss function
        # mean = torch.vstack([mean, mean, mean, mean]).to(device)
        # std = torch.vstack([std, std, std, std]).to(device)

        #Calculating the loss/error on the test data
        # loss = criterion(y_pred.squeeze(-1), trg_y)

        # Saves the vloss
        t_loss.append(loss.cpu().detach().numpy())

        # The backwards propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(f'Batch loss: {loss}')

    # Averaging over the t_loss of every batch
    avg_tloss = np.asarray(t_loss).mean()

    # Saves the loss
    losses.append(avg_tloss)

    # Turns off the training, since we want to calculate the validation loss
    model.train(False)

    # Create validation data
    validation_data = utils.dataset(validation_df, enc_seq_len=seq_len, trg_seq_len= output_size, batch_size= batch_size, device= device)

    # List to collect validation losses for the average loss of the validation
    v_loss = []
    c_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in validation_data:

      # Change the dimensions
      v_src, v_trg, v_trg_y = utils.change_dim(src, trg, trg_y, batch_first)

      # Transpose the data to match the LSTM model
      v_trg_y = torch.transpose(v_trg_y, 0, 1)
      v_src = torch.transpose(v_src, 0, 1)

      # Send data through the model and get the prediction results
      y_vpred = model.forward( src  )

      # Measure the loss/error of the predicted value vs the train_target (trg_y)
      vloss = criterion(y_vpred.squeeze(-1), v_trg_y.squeeze(-1))


      # # Sending the test data through the model
      # y_vpred = utils.run_encoder_decoder_inference(
      #                                               model=model,
      #                                               src=v_src,
      #                                               forecast_window=output_sequence_length,
      #                                               batch_size=src.shape[1],
      #                                               device = device
      #                                               )

      # # Stacking mean and std for the loss function
      # mean = torch.vstack([mean, mean, mean, mean]).to(device)
      # std = torch.vstack([std, std, std, std]).to(device)

      # # Calculating the loss/error on the validation data
      # vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y, mean, std)

      closs = corr.loss(y_vpred.squeeze(-1), v_trg_y.squeeze(-1))

      # Saves the vloss
      v_loss.append(vloss.cpu().detach().numpy())

      c_loss.append(closs.cpu().detach().numpy())

    # Averaging over the v_loss of every batch
    avg_vloss = np.asarray(v_loss).mean()

    avg_closs = np.asarray(c_loss).mean()

    # Saves average v_loss for the epoch
    v_losses.append(avg_vloss)

    corr_loss.append(avg_closs)

    # Saves the model parameters whenever the average validation loss is the lowest occured
    if avg_vloss < loss_min:
      loss_min = avg_vloss
      torch.save(model.state_dict(), f'{path}/Models/{model_version}/Model_parameters.pth')

    # Saves all the individual parameters for every epoch
    torch.save(model.state_dict(), f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_{i}.pth')

    # Print the losses during the loop
    print(f'Epoch {i} with loss: {avg_tloss}, validation loss: {avg_vloss} and Negative Correlation: {avg_closs}')


# Plots the train and validation loss values
plt.plot( losses[:])
plt.plot( v_losses[:])
plt.ylabel('MSE')
plt.xlabel('Epoch')

loss = pd.DataFrame([losses, v_losses, corr_loss], index= ['Train loss', 'Validation loss', 'Negative correlation']).T
loss.to_excel(f'{path}/Models/{model_version}/Training loss values.xlsx')

In [None]:
######### MSE Model #########

model_version = 'MSE_single'

# Define the loss function
criterion = nn.MSELoss().cuda()
corr = utils.NegCorr().cuda()

batch_size = 128
enc_seq_len = 48 # length of input given to encoder
output_sequence_length = 4 # target sequence length. The length we want to predict

## Creates the mask (Same for ecery iteration in the training since it's just a mask)
# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    ).cuda()

# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=output_sequence_length
    ).cuda()

# Define the model
model = tsModel.TimeSeriesTransformer(
                                      input_size=1,
                                      dec_seq_len=enc_seq_len,
                                      batch_first=False,
                                      num_predicted_features=1,
                                      n_encoder_layers = n_encoder_layers,
                                      n_decoder_layers = n_decoder_layers,
                                      n_heads = n_heads,
                                      PE = 'original',
                                      batch_size = batch_size
                                      ).cuda()

## Creating the train

# Chooses the optimizer and set learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# If we want to train the model from a certain already trained epoch
#model.load_state_dict(torch.load(f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_20.pth'))

# Number of epochs
epochs = 100

# List for our losses
losses = []
v_losses = []
corr_loss = []

loss_min = 100

# Training loop
for i in range(epochs):

    # Creates a decreasing learning rate based on the current epoch
    if i > 25:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
    if i > 50:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
    if i > 75:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)
    if i > 125:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)

    # Makes sure we train on the data
    model.train(True)

    # Create data
    input_data = utils.dataset(train_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect train losses for the average loss of the training set
    t_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in tqdm(input_data):

        # Change the dimensions
        src, trg, trg_y = utils.change_dim(src, trg, trg_y, batch_first)

        # Send data through the model and get the prediction results
        y_pred = model.forward( src=src,
                                tgt=trg,
                                src_mask=src_mask,
                                tgt_mask=tgt_mask
                                )

        # Measure the loss/error of the predicted value vs the train_target (trg_y)
        loss = criterion(y_pred.squeeze(-1), trg_y)


        # #Sending the train data through the model
        # y_pred = utils.run_encoder_decoder_inference(
        #                                             model=model,
        #                                             src=src,
        #                                             forecast_window=output_sequence_length,
        #                                             batch_size=src.shape[1],
        #                                             device = device
        #                                             )

        # # Stacking mean and std for the loss function
        # mean = torch.vstack([mean, mean, mean, mean]).to(device)
        # std = torch.vstack([std, std, std, std]).to(device)

        # #Calculating the loss/error on the test data
        # loss = criterion.loss(y_pred.squeeze(-1), trg_y, mean, std)

        # Saves the vloss
        t_loss.append(loss.cpu().detach().numpy())

        # The backwards propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(f'Batch loss: {loss}')

    # Averaging over the t_loss of every batch
    avg_tloss = np.asarray(t_loss).mean()

    # Saves the loss
    losses.append(avg_tloss)

    # Turns off the training, since we want to calculate the validation loss
    model.train(False)

    # Create validation data
    validation_data = utils.dataset(validation_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect validation losses for the average loss of the validation
    v_loss = []
    c_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in validation_data:

      # Change the dimensions
      v_src, v_trg, v_trg_y = utils.change_dim(src, trg, trg_y, batch_first)

      # Send data through the model and get the prediction results
      y_vpred = model.forward(  src=v_src,
                                tgt=v_trg,
                                src_mask=src_mask,
                                tgt_mask=tgt_mask
                                )

      # Measure the loss/error of the predicted value vs the train_target (trg_y)
      vloss = criterion(y_vpred.squeeze(-1), v_trg_y)


      # # Sending the test data through the model
      # y_vpred = utils.run_encoder_decoder_inference(
      #                                               model=model,
      #                                               src=v_src,
      #                                               forecast_window=output_sequence_length,
      #                                               batch_size=src.shape[1],
      #                                               device = device
      #                                               )

      # Stacking mean and std for the loss function
      mean = torch.vstack([mean, mean, mean, mean]).to(device)
      std = torch.vstack([std, std, std, std]).to(device)

      # # Calculating the loss/error on the validation data
      # vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y, mean, std)

      # Reverting the scaling so we can sum across the 4 weeks so we can calculate correlation
      y_vpred_corr = (y_vpred.squeeze(-1) * std) + mean
      v_trg_y_corr = (v_trg_y * std) + mean

      # Calculate correlation loss
      closs = corr.loss(y_vpred_corr.sum(axis=0), v_trg_y.sum(axis=0))

      # Saves the vloss
      v_loss.append(vloss.cpu().detach().numpy())

      c_loss.append(closs.cpu().detach().numpy())

    # Averaging over the v_loss of every batch
    avg_vloss = np.asarray(v_loss).mean()

    avg_closs = np.asarray(c_loss).mean()

    # Saves average v_loss for the epoch
    v_losses.append(avg_vloss)

    corr_loss.append(avg_closs)

    # Saves the model parameters whenever the average validation loss is the lowest occured
    if avg_vloss < loss_min:
      loss_min = avg_vloss
      torch.save(model.state_dict(), f'{path}/Models/{model_version}/Model_parameters.pth')

    # Saves all the individual parameters for every epoch
    torch.save(model.state_dict(), f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_{i}.pth')

    # Print the losses during the loop
    print(f'Epoch {i} with loss: {avg_tloss}, validation loss: {avg_vloss} and Negative Correlation: {avg_closs}')


# Plots the train and validation loss values
plt.plot( losses[:])
plt.plot( v_losses[:])
plt.ylabel('MSE')
plt.xlabel('Epoch')

loss = pd.DataFrame([losses, v_losses, corr_loss], index= ['Train loss', 'Validation loss', 'Negative correlation']).T
loss.to_excel(f'{path}/Models/{model_version}/Training loss values.xlsx')

In [None]:
######### LSTM Model #########

model_version = 'LSTM_single'

# Define the loss function
criterion = nn.MSELoss().cuda()
corr = utils.NegCorr().cuda()

batch_size = 128

input_size = 1
seq_len = 48
output_size = 4
hidden_size = 100

# Define the LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Take the output from the last time step
        return out

model = LSTM(input_size, hidden_size, output_size).cuda()


## Creating the train

# Chooses the optimizer and set learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# If we want to train the model from a certain already trained epoch
#model.load_state_dict(torch.load(f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_20.pth'))

# Number of epochs
epochs = 150

# List for our losses
losses = []
v_losses = []
corr_loss = []

loss_min = 100

# Training loop
for i in range(epochs):

    # Creates a decreasing learning rate based on the current epoch
    if i > 50:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
    if i > 75:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
    if i > 100:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)
    if i > 125:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)

    # Makes sure we train on the data
    model.train(True)

    # Create data
    input_data = utils.dataset(train_df, enc_seq_len=seq_len, trg_seq_len= output_size, batch_size= batch_size, device= device)

    # List to collect train losses for the average loss of the training set
    t_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in tqdm(input_data):

        # Change the dimensions
        src, trg, trg_y = utils.change_dim(src, trg, trg_y, batch_first)

        # Transpose the data to match the LSTM model
        trg_y = torch.transpose(trg_y, 0, 1)
        src = torch.transpose(src, 0, 1)

        # Send data through the model and get the prediction results
        y_pred = model.forward( src  )

        # Measure the loss/error of the predicted value vs the train_target (trg_y)
        loss = criterion(y_pred.squeeze(-1), trg_y.squeeze(-1))


        # #Sending the train data through the model
        # y_pred = utils.run_encoder_decoder_inference(
        #                                             model=model,
        #                                             src=src,
        #                                             forecast_window=output_sequence_length,
        #                                             batch_size=src.shape[1],
        #                                             device = device
        #                                             )

        # # Stacking mean and std for the loss function
        # mean = torch.vstack([mean, mean, mean, mean]).to(device)
        # std = torch.vstack([std, std, std, std]).to(device)

        #Calculating the loss/error on the test data
        # loss = criterion(y_pred.squeeze(-1), trg_y)

        # Saves the vloss
        t_loss.append(loss.cpu().detach().numpy())

        # The backwards propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(f'Batch loss: {loss}')

    # Averaging over the t_loss of every batch
    avg_tloss = np.asarray(t_loss).mean()

    # Saves the loss
    losses.append(avg_tloss)

    # Turns off the training, since we want to calculate the validation loss
    model.train(False)

    # Create validation data
    validation_data = utils.dataset(validation_df, enc_seq_len=seq_len, trg_seq_len= output_size, batch_size= batch_size, device= device)

    # List to collect validation losses for the average loss of the validation
    v_loss = []
    c_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in validation_data:

      # Change the dimensions
      v_src, v_trg, v_trg_y = utils.change_dim(src, trg, trg_y, batch_first)

      # Transpose the data to match the LSTM model
      v_trg_y = torch.transpose(v_trg_y, 0, 1)
      v_src = torch.transpose(v_src, 0, 1)

      # Send data through the model and get the prediction results
      y_vpred = model.forward( src  )

      # Measure the loss/error of the predicted value vs the train_target (trg_y)
      vloss = criterion(y_vpred.squeeze(-1), v_trg_y.squeeze(-1))


      # # Sending the test data through the model
      # y_vpred = utils.run_encoder_decoder_inference(
      #                                               model=model,
      #                                               src=v_src,
      #                                               forecast_window=output_sequence_length,
      #                                               batch_size=src.shape[1],
      #                                               device = device
      #                                               )

      # # Stacking mean and std for the loss function
      # mean = torch.vstack([mean, mean, mean, mean]).to(device)
      # std = torch.vstack([std, std, std, std]).to(device)

      # # Calculating the loss/error on the validation data
      # vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y, mean, std)

      closs = corr.loss(y_vpred.squeeze(-1), v_trg_y.squeeze(-1))

      # Saves the vloss
      v_loss.append(vloss.cpu().detach().numpy())

      c_loss.append(closs.cpu().detach().numpy())

    # Averaging over the v_loss of every batch
    avg_vloss = np.asarray(v_loss).mean()

    avg_closs = np.asarray(c_loss).mean()

    # Saves average v_loss for the epoch
    v_losses.append(avg_vloss)

    corr_loss.append(avg_closs)

    # Saves the model parameters whenever the average validation loss is the lowest occured
    if avg_vloss < loss_min:
      loss_min = avg_vloss
      torch.save(model.state_dict(), f'{path}/Models/{model_version}/Model_parameters.pth')

    # Saves all the individual parameters for every epoch
    torch.save(model.state_dict(), f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_{i}.pth')

    # Print the losses during the loop
    print(f'Epoch {i} with loss: {avg_tloss}, validation loss: {avg_vloss} and Negative Correlation: {avg_closs}')


# Plots the train and validation loss values
plt.plot( losses[:])
plt.plot( v_losses[:])
plt.ylabel('MSE')
plt.xlabel('Epoch')

loss = pd.DataFrame([losses, v_losses, corr_loss], index= ['Train loss', 'Validation loss', 'Negative correlation']).T
loss.to_excel(f'{path}/Models/{model_version}/Training loss values.xlsx')

In [None]:
######### MSE Model #########

model_version = 'MSE_test'

# Define the loss function
criterion = nn.MSELoss().cuda()
corr = utils.NegCorr().cuda()

batch_size = 128
enc_seq_len = 48 # length of input given to encoder
output_sequence_length = 1 # target sequence length. The length we want to predict

## Creates the mask (Same for ecery iteration in the training since it's just a mask)
# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    ).cuda()

# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=output_sequence_length
    ).cuda()

# Define the model
model = tsModel.TimeSeriesTransformer(
                                      input_size=1,
                                      dec_seq_len=enc_seq_len,
                                      batch_first=False,
                                      num_predicted_features=1,
                                      n_encoder_layers = n_encoder_layers,
                                      n_decoder_layers = n_decoder_layers,
                                      n_heads = n_heads,
                                      PE = 'original',
                                      batch_size = batch_size
                                      ).cuda()


## Creating the train

# Chooses the optimizer and set learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

# If we want to train the model from a certain already trained epoch
#model.load_state_dict(torch.load(f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_20.pth'))

# Number of epochs
epochs = 150

# List for our losses
losses = []
v_losses = []
corr_loss = []

loss_min = 100

# Training loop
for i in range(epochs):

    # Creates a decreasing learning rate based on the current epoch
    if i > 50:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)
    if i > 100:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)
    if i > 125:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.0000005)
    if i > 150:
      optimizer = torch.optim.Adam(model.parameters(), lr=0.0000001)

    # Makes sure we train on the data
    model.train(True)

    # Create data
    input_data = utils.dataset(train_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect train losses for the average loss of the training set
    t_loss = []

    # Loop over each batch
    for src, trg, trg_y, mean, std in tqdm(input_data):

        # Change the dimensions
        src, trg, trg_y = utils.change_dim(src, trg, trg_y, batch_first)

        # Send data through the model and get the prediction results
        y_pred = model.forward( src=src,
                                tgt=trg,
                                src_mask=src_mask,
                                tgt_mask=tgt_mask
                                )

        # Measure the loss/error of the predicted value vs the train_target (trg_y)
        loss = criterion(y_pred.squeeze(-1), trg_y)


        # #Sending the train data through the model
        # y_pred = utils.run_encoder_decoder_inference(
        #                                             model=model,
        #                                             src=src,
        #                                             forecast_window=output_sequence_length,
        #                                             batch_size=src.shape[1],
        #                                             device = device
        #                                             )

        # # Stacking mean and std for the loss function
        # mean = torch.vstack([mean, mean, mean, mean]).to(device)
        # std = torch.vstack([std, std, std, std]).to(device)

        #Calculating the loss/error on the test data
        # loss = criterion(y_pred.squeeze(-1), trg_y)

        # Saves the vloss
        t_loss.append(loss.cpu().detach().numpy())

        # The backwards propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(f'Batch loss: {loss}')

    # Averaging over the t_loss of every batch
    avg_tloss = np.asarray(t_loss).mean()

    # Saves the loss
    losses.append(avg_tloss)

    # Turns off the training, since we want to calculate the validation loss
    model.train(False)

    # Create validation data
    validation_data = utils.dataset(validation_df, enc_seq_len=enc_seq_len, trg_seq_len= output_sequence_length, batch_size= batch_size, device= device)

    # List to collect validation losses for the average loss of the validation
    v_loss = []
    c_loss = []

    # Loop over each batch
    for v_src, v_trg, v_trg_y, mean, std in validation_data:

      # Change the dimensions
      v_src, v_trg, v_trg_y = utils.change_dim(v_src, v_trg, v_trg_y, batch_first)

      # Send data through the model and get the prediction results
      y_vpred = model.forward( src=v_src,
                              tgt=v_trg,
                              src_mask=src_mask,
                              tgt_mask=tgt_mask
                              )

      # Measure the loss/error of the predicted value vs the train_target (trg_y)
      vloss = criterion(y_vpred.squeeze(-1), v_trg_y)


      # # Sending the test data through the model
      # y_vpred = utils.run_encoder_decoder_inference(
      #                                               model=model,
      #                                               src=v_src,
      #                                               forecast_window=output_sequence_length,
      #                                               batch_size=src.shape[1],
      #                                               device = device
      #                                               )

      # # Stacking mean and std for the loss function
      # mean = torch.vstack([mean, mean, mean, mean]).to(device)
      # std = torch.vstack([std, std, std, std]).to(device)

      # # Calculating the loss/error on the validation data
      # vloss = criterion.loss(y_vpred.squeeze(-1), v_trg_y, mean, std)

      closs = corr.loss(y_vpred.squeeze(-1).squeeze(-1), v_trg_y.squeeze(-1))

      # Saves the vloss
      v_loss.append(vloss.cpu().detach().numpy())

      c_loss.append(closs.cpu().detach().numpy())

    # Averaging over the v_loss of every batch
    avg_vloss = np.asarray(v_loss).mean()

    avg_closs = np.asarray(c_loss).mean()

    # Saves average v_loss for the epoch
    v_losses.append(avg_vloss)

    corr_loss.append(avg_closs)

    # Saves the model parameters whenever the average validation loss is the lowest occured
    if avg_vloss < loss_min:
      loss_min = avg_vloss
      torch.save(model.state_dict(), f'{path}/Models/{model_version}/Model_parameters.pth')

    # Saves all the individual parameters for every epoch
    torch.save(model.state_dict(), f'{path}/Models/{model_version}/Training parameters/Parameters_epoch_{i}.pth')

    # Print the losses during the loop
    print(f'Epoch {i} with loss: {avg_tloss}, validation loss: {avg_vloss} and Negative Correlation: {avg_closs}')


# Plots the train and validation loss values
plt.plot( losses[:])
plt.plot( v_losses[:])
plt.ylabel('MSE')
plt.xlabel('Epoch')

loss = pd.DataFrame([losses, v_losses, corr_loss], index= ['Train loss', 'Validation loss', 'Negative correlation']).T
loss.to_excel(f'{path}/Models/{model_version}/Training loss values.xlsx')