## Modelling and Training Notebook

### Imports

In [48]:
import pickle
import os
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm as tq
import torch
import torch.nn.functional as F
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.utils import check_array

import config as cfg

In [4]:
def load_ds(ds_filename):
    with open(ds_filename, "rb") as f:
        return pickle.load(f)

Custom metrics function for MAPE and Theil-U

In [8]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = check_array(y_true, y_pred)
    
    return np.mean(np.abs(y_true - y_pred) / y_true) * 100

def theil_u(y_true, y_pred):
    y_true, y_pred = check_array(y_true, y_pred)
    
    den = np.sqrt(np.mean(y_true**2)) + np.sqrt(np.mean(y_pred**2)) 
    
    return mean_squared_error(y_true, y_pred) / den

Transform pandas dataframe into dataloader for training

In [37]:
def df_to_dataloader(ds, targetcol_name="Target"):
    tmpdf_train = ds['train'].copy()
    tmpdf_val = ds['val'].copy()
    tmpdf_test = ds['test'].copy()
    target_train = tmpdf_train[targetcol_name]
    target_val = tmpdf_val[targetcol_name]
    target_test = tmpdf_test[targetcol_name]
    del tmpdf_train[targetcol_name]
    del tmpdf_val[targetcol_name]
    del tmpdf_test[targetcol_name]
    del tmpdf_train["Date"]
    del tmpdf_val["Date"]
    del tmpdf_test["Date"]
    
    trainds = torch.utils.data.TensorDataset(torch.Tensor(np.array(tmpdf_train)), torch.Tensor(np.array(target_train)))
    trainloader = torch.utils.data.DataLoader(trainds, batch_size=1, shuffle=True)
    valds = torch.utils.data.TensorDataset(torch.Tensor(np.array(tmpdf_val)), torch.Tensor(np.array(target_val)))
    valloader = torch.utils.data.DataLoader(valds, batch_size=1, shuffle=False)
    testds = torch.utils.data.TensorDataset(torch.Tensor(np.array(tmpdf_test)), torch.Tensor(np.array(target_test)))
    testloader = torch.utils.data.DataLoader(testds, batch_size=1, shuffle=False)
    return trainloader, valloader, testloader

In [38]:
SPY_ds = load_ds("data/SPY_ds.pkl")
DIA_ds = load_ds("data/DIA_ds.pkl")
QQQ_ds = load_ds("data/QQQ_ds.pkl")

In [39]:
trainloader_SPY_MLP, valloader_SPY_MLP, testloader_SPY_MLP = df_to_dataloader(SPY_ds["MLP"])

In [42]:
class MLP(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.hidden = torch.nn.Linear(self.input_size, self.hidden_size)
        self.out = torch.nn.Linear(self.hidden_size, self.output_size)
    
    def forward(self, x):
        x = F.sigmoid(self.hidden(x))
        x = self.out(x)
        
        return x

In [68]:
SPY_MLP = MLP(cfg.train_parameters['SPY']['MLP']['input_size']+1,
              cfg.train_parameters['SPY']['MLP']['hidden_size'],
              cfg.train_parameters['SPY']['MLP']['output_size']
             )

if cfg.train_parameters["SPY"]["MLP"]["optim"] == "SGD":
    SPY_MLP_optimizer = torch.optim.SGD(SPY_MLP.parameters(),
                                        lr = cfg.train_parameters["SPY"]["MLP"]["lr"],
                                        momentum = cfg.train_parameters["SPY"]["MLP"]["momentum"]
                                       )
else:
    SPY_MLP_optimizer = torch.optim.Adam(SPY_MLP.parameters(),
                                        lr = cfg.train_parameters["SPY"]["MLP"]["lr"],
                                        momentum = cfg.train_parameters["SPY"]["MLP"]["momentum"]
                                        )

loss_fn = torch.nn.MSELoss()

In [69]:
train_losses, val_losses = [], []

for epoch in range(cfg.train_parameters["SPY"]["MLP"]["epochs"]):
    SPY_MLP.train()
    
    train_loss, val_loss = [], []
    
    for features, target in trainloader_SPY_MLP:
        SPY_MLP_optimizer.zero_grad()
        
        outputs = SPY_MLP(features)
        loss = loss_fn(outputs, target)
        loss.backward()
        SPY_MLP_optimizer.step()
        
        train_loss.append(loss.item())
    
    SPY_MLP.eval()
    with torch.no_grad():
        for features, target in valloader_SPY_MLP:
            outputs = SPY_MLP(features)
            loss = loss_fn(outputs, target)
            
            val_loss.append(loss.item())
    
    train_losses.append(np.mean(train_loss))
    val_losses.append(np.mean(val_loss))
    
    if (epoch+1) == 1000 or epoch+1 == 1:
        print("Epoch {}/{} - Train Loss : {:.4f} / Val Loss : {:.4f}".format(epoch,
                                                                             cfg.train_parameters["SPY"]["MLP"]["epochs"],
                                                                             np.mean(train_loss),
                                                                             np.mean(val_loss)))

Epoch 0/30000 - Train Loss : 817.7941 / Val Loss : 1091.9121
Epoch 999/30000 - Train Loss : 62.9436 / Val Loss : 1182.0085


KeyboardInterrupt: 