In [None]:
import os
import NN
import utils
from torch import save as save_model
from torch import load as load_model
import config as cfg
import pandas as pd
import seaborn as sns
import numpy as np
import torch
import matplotlib.pyplot as plt
import warnings
from tqdm import tqdm_notebook as tqdm
warnings.filterwarnings('ignore')
sns.set()

In [None]:
ETF = 'SPY' # 'SPY', 'DIA', 'QQQ'
NNtype = 'MLP' # 'MLP', 'RNN', 'PSN'
params = cfg.train_parameters[ETF][NNtype]
inputs_lag = cfg.SPYfeatures[NNtype] # SPYfeatures, DIAfeatures, QQQfeatures

In [None]:
traindf = utils.load_file(os.path.join("data", ETF, NNtype, "Train.pkl"))
validdf = utils.load_file(os.path.join("data", ETF, NNtype, "Valid.pkl"))
testdf  = utils.load_file(os.path.join("data", ETF, NNtype, "Test.pkl"))
traindf.head()


In [None]:
features_col = []
for i in inputs_lag:
    features_col.append("Return_" + str(i))
target_col   = 'Target'

mu_train, sigma_train = traindf.Return.mean(), traindf.Return.std()
mu_val, sigma_val = validdf.Return.mean(), validdf.Return.std()
mu_test, sigma_test = testdf.Return.mean(), testdf.Return.std()


trainloader = utils.DataFrame2DataLoader(traindf, features_col, target_col, batch_size=1, normalize=True, mu=mu_train, sigma=sigma_train, shuffle=True)
validloader = utils.DataFrame2DataLoader(validdf, features_col, target_col, batch_size=1, normalize=True, mu=mu_val, sigma=sigma_val)
testloader  = utils.DataFrame2DataLoader(testdf, features_col, target_col, batch_size=1, normalize=True, mu=mu_test, sigma=sigma_test)

In [None]:
import torch.nn as nn 
from sklearn.metrics import mean_absolute_error, mean_squared_error
from utils import mean_absolute_percentage_error, theilU, PT_test
from torch.optim.lr_scheduler import StepLR




class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=70, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size, num_layers= 2, dropout=0.80)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(2,1,self.hidden_layer_size),
                            torch.zeros(2,1,self.hidden_layer_size))

    
    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]
    
class Model():
    def __init__(self, params):
        self.model = LSTM()
        self.epochs = params["epochs"]
        print(self.model.parameters())
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=params["lr"], weight_decay=0.80)
        self.validation = params["validation"]
        self.validation_freq = params["validation_freq"]
        self.loss_function = nn.MSELoss()
        self.scheduler = StepLR(self.optimizer, step_size=2, gamma=0.01)

        
    def evaluate_model(self, dataloader, mu, sigma):
        
        valid_preds = []
        valid_targets = []
        self.model.eval()
        with torch.no_grad():
            loss = 0
            for seq, labels in dataloader:
                y_pred = self.model(seq.squeeze())
                loss += self.loss_function((y_pred + mu) * sigma, (labels + mu) * sigma)
                valid_preds.append(y_pred)
                valid_targets += labels.numpy().tolist()
            
        validRMSE = mean_squared_error((valid_targets + mu)* sigma, (valid_preds+mu)*sigma)
        validMAE = mean_absolute_error((valid_targets + mu)* sigma, (valid_preds+mu)*sigma)
        validMAPE = mean_absolute_percentage_error((np.array(valid_targets)+ mu) * sigma , (np.array(valid_preds) + mu) * sigma)
        validTheilU = theilU((np.array(valid_targets)+ mu) * sigma , (np.array(valid_preds) + mu) * sigma)

        print("MAE : {:.4f} |  MAPE  : {:.4f} |  RMSE : {:.5f} | Theil-U {:.4f}".format(validMAE, validMAPE, validRMSE, validTheilU))
        
        plt.plot(valid_preds)
        plt.plot(valid_targets)


    def train_model(self, trainloader, validloader, mu_train, sigma_train, mu_val, sigma_val): 
        train_loss = []
        val_loss = []
        loss_function = nn.MSELoss()
        for i in tqdm(range(self.epochs)):
            for seq, labels in trainloader:
                self.model.train()
                self.optimizer.zero_grad()

                self.model.hidden_cell = (torch.zeros(2, 1, self.model.hidden_layer_size),
                                torch.zeros(2, 1, self.model.hidden_layer_size))

                y_pred = self.model(seq.squeeze())

                single_loss = self.loss_function(y_pred, labels)
                single_loss.item()
                single_loss.backward()
                self.optimizer.step()
                trya += single_loss.item() 
                self.scheduler.step()

            ###############################
            ###########Validation##########
            ###############################
            
            if self.validation:
                if i%self.validation_freq == 0:
                    
                    self.model.eval()
                    with torch.no_grad():
                        loss_valid = 0
                        for seq, labels in validloader:
                            y_pred = self.model(seq.squeeze())
                            loss_valid += self.loss_function((y_pred + mu_val) * sigma_val, (labels + mu_val) * sigma_val)

                        val_loss.append(loss_valid.item())
                        loss_train = 0 
                        for seq, labels in trainloader:
                            y_pred = self.model(seq.squeeze())

                            loss_train += self.loss_function((y_pred + mu_train) * sigma_train, (labels + mu_train) * sigma_train)
                        train_loss.append(loss_train.item())
                    print(f'epoch: {i:3} train_loss: {loss_train:10.10f} val_loss: {loss_valid:10.10f} ')
        sns.lineplot(list(range(int(self.epochs/self.validation_freq))), train_loss)
        sns.lineplot(list(range(int(self.epochs/self.validation_freq))), val_loss)
                    
    def predict(self, testloader, mu, sigma):
        predicted_labels = []
        self.model.eval()
        with torch.no_grad():
            for seq, target in testloader:
                predicted_labels.append(self.model(seq.squeeze()))
        return (predicted_labels + mu) * sigma

      



In [None]:
params = {"lr": 0.1, "epochs": 15, "validation":True, "validation_freq":5}

print("Testing the model with random initialization on the test set.. ")
model = Model(params)
model.evaluate_model(testloader, mu_test, sigma_test)
print("Model is training ..")
model.train_model(trainloader, validloader, mu_train, sigma_train, mu_val, sigma_val)
print("Testing the model after training.. ")
model.evaluate_model(testloader, mu_test, sigma_test)

preditions = model.predict(testloader, mu_test, sigma_test)

