# Define the level 1 models

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import yaml
from os.path import join

import data_prep

# Read in config file
hyperparams = yaml.safe_load(open('hyperparams.yaml'))
print(hyperparams)

# Hyperparameters
num_features = hyperparams['num_features']  # Number of features
use_time_horizon = hyperparams['use_time_horizon']  # Use time horizon
HORIZON = hyperparams['horizon']  # Number of days into the future to predict
DAYS_FORWARD = hyperparams['days_forward']  # Number of days into the future to predict
END_SPLIT = hyperparams['end_split']  # End of the split
DATA_PATH = join('data', 'original_dataset', 'Finalised_datasets', 'amzn_all_sources_WITH_TH_2017-2020.csv') #'Finalised_datasets',
models = hyperparams['models']  # Models to train

print(models)
lstm_params = models[0]

# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(num_features, lstm_params['hidden_size'], batch_first=True)
        self.dropout1 = nn.Dropout(lstm_params['dropout'])
        self.lstm2 = nn.LSTM(lstm_params['hidden_size'], lstm_params['hidden_size'], batch_first=True)
        self.dropout2 = nn.Dropout(lstm_params['dropout'])
        self.lstm3 = nn.LSTM(lstm_params['hidden_size'],lstm_params['hidden_size'], batch_first=True)
        self.dropout3 = nn.Dropout(lstm_params['dropout'])
        self.lstm4 = nn.LSTM(lstm_params['hidden_size'],lstm_params['hidden_size'], batch_first=True)  
        self.dropout4 = nn.Dropout(lstm_params['dropout'])
        self.fc = nn.Linear(lstm_params['hidden_size'], 1) 
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        if use_time_horizon:
            outputs = []
            # For each sequence in the batch
            for i in range(x.shape[0]):
                x_ele = x[i]
                # remove padding
                x_ele = x_ele[x_ele[:, 0] != -1]
                # print("before", x_ele.shape)
                x_ele = x_ele.unsqueeze(0) # Add a batch dimension
                # print("after", x_ele.shape)

                out, _ = self.lstm1(x_ele)  # process single sequence
                out = self.dropout1(out)
                out, _ = self.lstm2(out)
                out = self.dropout2(out)
                out, _ = self.lstm3(out)
                out = self.dropout3(out)
                out, _ = self.lstm4(out)
                out = out[:, -1, :]  # take the last output from the last LSTM layer
                out = self.dropout4(out)
                out = self.fc(out)
                out = self.sigmoid(out)
                outputs.append(out)

            outputs = torch.cat(outputs, dim=0)  # recombine into a single batch tensor
            return outputs
        else:
            # print("x shape", x.shape)
            x, _ = self.lstm1(x)
            x = self.dropout1(x)
            x, _ = self.lstm2(x)
            x = self.dropout2(x)
            x, _ = self.lstm3(x)
            x = self.dropout3(x)
            x, _ = self.lstm4(x)
            x = x[:, -1, :]  # Take the last output from the last LSTM layer
            x = self.dropout4(x)
            x = self.fc(x)   # Linear layer to map to 1 output
            x = self.sigmoid(x)
            return x

gru_params = models[1]
# GRU Model
class GRUModel(nn.Module):
    def __init__(self):
        super(GRUModel, self).__init__()
        self.gru1 = nn.GRU(num_features, gru_params['hidden_size'], batch_first=True)
        self.dropout1 = nn.Dropout(gru_params['dropout'])
        self.gru2 = nn.GRU(gru_params['hidden_size'], gru_params['hidden_size'], batch_first=True)
        self.dropout2 = nn.Dropout(gru_params['dropout'])
        self.gru3 = nn.GRU(gru_params['hidden_size'], gru_params['hidden_size'], batch_first=True)
        self.dropout3 = nn.Dropout(gru_params['dropout'])
        self.gru4 = nn.GRU(gru_params['hidden_size'], gru_params['hidden_size'], batch_first=True)
        self.dropout4 = nn.Dropout(gru_params['dropout'])
        self.fc = nn.Linear(gru_params['hidden_size'], 1)  # Ensures the output is of size [batch_size, 1]
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        if use_time_horizon:

            outputs = []
            # For each sequence in the batch
            for i in range(x.shape[0]):
                x_ele = x[i]
                # remove padding
                x_ele = x_ele[x_ele[:, 0] != -1]
                x_ele = x_ele.unsqueeze(0)
                # print(x_ele.shape)

                # Pass through the GRU layers
                x_ele, _ = self.gru1(x_ele)
                x_ele = self.dropout1(x_ele)
                x_ele, _ = self.gru2(x_ele)
                x_ele = self.dropout2(x_ele)
                x_ele, _ = self.gru3(x_ele)
                x_ele = self.dropout3(x_ele)
                x_ele, _ = self.gru4(x_ele)
                x_ele = x_ele[:, -1, :]  # Take the last output
                x_ele = self.dropout4(x_ele)
                x_ele = self.fc(x_ele)   # Linear layer to map to 1 output
                x_ele = self.sigmoid(x_ele)
                outputs.append(x_ele)

            outputs = torch.cat(outputs, dim=0)  # recombine into a single batch tensor
            return outputs
        else:
            x, _ = self.gru1(x)
            x = self.dropout1(x)
            x, _ = self.gru2(x)
            x = self.dropout2(x)
            x, _ = self.gru3(x)
            x = self.dropout3(x)
            x, _ = self.gru4(x)
            x = x[:, -1, :]  # Take the last output
            x = self.dropout4(x)
            x = self.fc(x)   # Linear layer to map to 1 output
            x = self.sigmoid(x)
            return x

{'num_features': 9, 'use_time_horizon': False, 'horizon': 10, 'days_forward': 1, 'end_split': 30, 'return_lowest_val_loss': False, 'models': [{'model': 'lstm', 'hidden_size': 50, 'dropout': 0.4, 'learning_rate': 0.0016, 'batch_size': 16, 'num_epochs': 100, 'shuffle': True}, {'model': 'gru', 'hidden_size': 50, 'dropout': 0.4, 'learning_rate': 0.0008, 'batch_size': 16, 'num_epochs': 100, 'shuffle': True}, {'model': 'mlp', 'hidden_size': 4, 'learning_rate': 0.001, 'batch_size': 8, 'num_epochs': 100, 'shuffle': True}]}
[{'model': 'lstm', 'hidden_size': 50, 'dropout': 0.4, 'learning_rate': 0.0016, 'batch_size': 16, 'num_epochs': 100, 'shuffle': True}, {'model': 'gru', 'hidden_size': 50, 'dropout': 0.4, 'learning_rate': 0.0008, 'batch_size': 16, 'num_epochs': 100, 'shuffle': True}, {'model': 'mlp', 'hidden_size': 4, 'learning_rate': 0.001, 'batch_size': 8, 'num_epochs': 100, 'shuffle': True}]


# Define the meta learner

In [7]:
# it's a fully-connect neuralnetwork with three layers; the activation function for this model is the Rectified Linear Unit (ReLu).
# NOTE: The paper doesn't specify the number of neurons in the hidden layers, so I'm basing on the stanford paper
meta_params = models[2]

class MetaLearner(nn.Module):
    def __init__(self):
        super(MetaLearner, self).__init__()
        self.fc1 = nn.Linear(2, meta_params['hidden_size'], bias=True)
        self.fc12 = nn.Linear(meta_params['hidden_size'], 1, bias=False)
        # self.fc2 = nn.Linear(30, 25)
        # self.fc3 = nn.Linear(25, 20)
        # self.fc4 = nn.Linear(20, 1, bias=False)
        self.sigmoid = nn.Sigmoid() 
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc12(x)
        # x = self.relu(self.fc2(x))
        # x = self.relu(self.fc3(x))
        # x = self.fc4(x)
        x = self.sigmoid(x) 
        return x

# Load in data

In [8]:
import pandas as pd
import numpy as np

from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader, TensorDataset

def get_data_stanford():

    # To drop columns, change the data_prep.py file

    # split_y: tuple
    #     (train_y, validate_y, test_y)
    # split_X: tuple
    #     (train_X, validate_X, test_X)
    split_y, split_X = data_prep.data_prep(DATA_PATH, HORIZON, DAYS_FORWARD, END_SPLIT, use_time_horizon)


    return split_X, split_y



(x_train, x_val, x_test), (y_train, y_val, y_test) = get_data_stanford()

print('x_train.shape', x_train.shape)
print('y_train.shape', y_train.shape)
print('x_val.shape', x_val.shape)
print('y_val.shape', y_val.shape)
print('x_test.shape', x_test.shape)
print('y_test.shape', y_test.shape)




          date  mean_compound_reuters  mean_compound_guardian  \
29  2018/01/29                    0.0                  0.0000   
30  2018/01/30                    0.0                  0.4215   
31  2018/01/31                    0.0                  0.0000   
32  2018/02/01                    0.0                  0.2074   
33  2018/02/02                    0.0                  0.0000   

    mean_compound_cnbc  mean_compound_other   mean TH      Close       Volume  \
29              0.0000             0.526700  3.500000  70.884003  114038000.0   
30              0.3854             0.117916  4.153333  71.890999  117438000.0   
31              0.0000             0.088438  4.125000  72.544502  128494000.0   
32              0.1779             0.177900  3.666667  69.500000  182276000.0   
33              0.1779             0.580220  3.600000  71.497498  222514000.0   

         Open       High  ...  Adj Close     SMA_15     SMA_30  SMA_Indicator  \
29  70.459000  71.569504  ...  70.884003 

# Instatiate the models

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Instantiate models
lstm_model = LSTMModel()
gru_model = GRUModel()
meta_model = MetaLearner()

# Define loss and optimizer
criterion = nn.BCELoss()
lstm_optimizer = optim.RMSprop(lstm_model.parameters(), lr=lstm_params['learning_rate'], weight_decay=1e-5) # 16 batch size, 150 epochs
gru_optimizer = optim.RMSprop(gru_model.parameters(), lr=gru_params['learning_rate'], weight_decay=1e-5) # 16 batch size, 200 epochs
meta_criterion = nn.BCELoss()
meta_optimizer = optim.Adam(meta_model.parameters(), eps=1e-7, lr=meta_params['learning_rate'])
base_models_batch_size = lstm_params['batch_size'] # same batch size for both models
meta_learner_batch_size = meta_params['batch_size']

# Train the base models

In [10]:
import copy

# Convert data to PyTorch tensors and create DataLoader
X_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_val_tensor = torch.tensor(x_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

print(X_train_tensor.shape)
print(y_train_tensor.shape)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=base_models_batch_size, shuffle=lstm_params['shuffle']) #Stanford had shuffle true

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=base_models_batch_size) 

import torch

def train_model(model, optimizer, criterion, train_loader, n_epochs=150, val_loader=None, return_lowest_val_loss=False):
    model.train()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_loss = float('inf')
    best_val_accuracy = 0

    for epoch in range(n_epochs):
        epoch_loss = 0
        correct_train = 0
        total_train = 0
        
        # Training phase
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch.view(-1, 1))
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

            # Calculate training accuracy
            predicted_train = output.round()
            total_train += y_batch.size(0)
            correct_train += (predicted_train == y_batch.view(-1, 1)).sum().item()

        # Output training loss and accuracy
        train_loss_avg = epoch_loss / len(train_loader)
        train_accuracy = correct_train / total_train
        print(f'Epoch {epoch+1}/{n_epochs}, Training Loss: {train_loss_avg:.4f}, Training Accuracy: {train_accuracy:.2f}', end='')

        # Validation phase (if val_loader is provided)
        if val_loader:
            model.eval()
            val_loss = 0
            correct_val = 0
            total_val = 0
            with torch.no_grad():
                for X_val, y_val in val_loader:
                    output_val = model(X_val)
                    val_loss += criterion(output_val, y_val.view(-1, 1)).item()

                    predicted_val = output_val.round()
                    total_val += y_val.size(0)
                    correct_val += (predicted_val == y_val.view(-1, 1)).sum().item()

            val_loss_avg = val_loss / len(val_loader)
            val_accuracy = correct_val / total_val
            print(f', Validation Loss: {val_loss_avg:.4f}, Validation Accuracy: {val_accuracy:.2f}')
            
            # if val_accuracy > best_val_accuracy - 1e-2:
            #     best_val_loss = val_loss_avg
            #     best_val_accuracy = val_accuracy
            #     best_model_wts = copy.deepcopy(model.state_dict())
            # # Check if this is the best model so far
            if val_loss_avg < best_val_loss + 1e-2 : # Add 1e-2 as a "buffer" to favor the latest model
                best_val_loss = val_loss_avg
                best_val_accuracy = val_accuracy
                best_model_wts = copy.deepcopy(model.state_dict())

            model.train()

        else:
            print()  # Just move to the next line

    # Load the best model weights
    if return_lowest_val_loss and val_loader != None:
        print(f'Lowest validation loss: {best_val_loss:.4f}')
        print(f'Best validation accuracy: {best_val_accuracy:.2f}')
        model.load_state_dict(best_model_wts)
    return model



# Train the LSTM model
print("Training LSTM Model")
train_model(lstm_model, lstm_optimizer, criterion, train_loader, lstm_params['num_epochs'], val_loader, hyperparams['return_lowest_val_loss'])

# Train the GRU model
print("Training GRU Model")
train_model(gru_model, gru_optimizer, criterion, train_loader, gru_params['num_epochs'], val_loader, hyperparams['return_lowest_val_loss']) # 200

torch.Size([415, 10, 9])
torch.Size([415])
Training LSTM Model
Epoch 1/100, Training Loss: 0.6945, Training Accuracy: 0.56, Validation Loss: 0.7033, Validation Accuracy: 0.47
Epoch 2/100, Training Loss: 0.6895, Training Accuracy: 0.56, Validation Loss: 0.7070, Validation Accuracy: 0.47
Epoch 3/100, Training Loss: 0.6885, Training Accuracy: 0.56, Validation Loss: 0.7048, Validation Accuracy: 0.47
Epoch 4/100, Training Loss: 0.6865, Training Accuracy: 0.56, Validation Loss: 0.7081, Validation Accuracy: 0.47
Epoch 5/100, Training Loss: 0.6862, Training Accuracy: 0.56, Validation Loss: 0.6978, Validation Accuracy: 0.47
Epoch 6/100, Training Loss: 0.6861, Training Accuracy: 0.56, Validation Loss: 0.7016, Validation Accuracy: 0.47
Epoch 7/100, Training Loss: 0.6860, Training Accuracy: 0.55, Validation Loss: 0.6976, Validation Accuracy: 0.47
Epoch 8/100, Training Loss: 0.6853, Training Accuracy: 0.56, Validation Loss: 0.6944, Validation Accuracy: 0.47
Epoch 9/100, Training Loss: 0.6864, Train

GRUModel(
  (gru1): GRU(9, 50, batch_first=True)
  (dropout1): Dropout(p=0.4, inplace=False)
  (gru2): GRU(50, 50, batch_first=True)
  (dropout2): Dropout(p=0.4, inplace=False)
  (gru3): GRU(50, 50, batch_first=True)
  (dropout3): Dropout(p=0.4, inplace=False)
  (gru4): GRU(50, 50, batch_first=True)
  (dropout4): Dropout(p=0.4, inplace=False)
  (fc): Linear(in_features=50, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

# Use base models to predict the validation data, this will be used as input to the Meta Learner

In [11]:
lstm_val_predictions = lstm_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy().reshape(-1,1)
gru_val_predictions = gru_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy().reshape(-1,1)

# lstm_pred = lstm_model.predict(X).reshape(-1, 1)
# gru_pred = gru_model.predict(X).reshape(-1, 1)

# Form and return new data set
# new_X = np.hstack((lstm_pred, gru_pred))


# Combine predictions to form new training data for the meta-learner
meta_X_train = np.concatenate((lstm_val_predictions, gru_val_predictions), axis=1)#meta_X_train = np.hstack((lstm_val_predictions, gru_val_predictions))#

print(meta_X_train.shape)



(30, 2)


# Train meta learner

In [12]:
meta_X_train_tensor = torch.tensor(meta_X_train, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

meta_train_dataset = TensorDataset(meta_X_train_tensor, y_val_tensor)
meta_train_loader = DataLoader(meta_train_dataset, batch_size=meta_learner_batch_size, shuffle=meta_params['shuffle'])



train_model(meta_model, meta_optimizer, meta_criterion, meta_train_loader, meta_params['num_epochs'], return_lowest_val_loss=False)

Epoch 1/100, Training Loss: 0.7085, Training Accuracy: 0.47
Epoch 2/100, Training Loss: 0.7007, Training Accuracy: 0.47
Epoch 3/100, Training Loss: 0.7060, Training Accuracy: 0.47
Epoch 4/100, Training Loss: 0.7038, Training Accuracy: 0.47
Epoch 5/100, Training Loss: 0.7074, Training Accuracy: 0.47
Epoch 6/100, Training Loss: 0.7032, Training Accuracy: 0.47
Epoch 7/100, Training Loss: 0.7048, Training Accuracy: 0.47
Epoch 8/100, Training Loss: 0.7047, Training Accuracy: 0.47
Epoch 9/100, Training Loss: 0.6992, Training Accuracy: 0.47
Epoch 10/100, Training Loss: 0.7007, Training Accuracy: 0.47
Epoch 11/100, Training Loss: 0.7023, Training Accuracy: 0.47
Epoch 12/100, Training Loss: 0.7038, Training Accuracy: 0.47
Epoch 13/100, Training Loss: 0.7001, Training Accuracy: 0.47
Epoch 14/100, Training Loss: 0.6985, Training Accuracy: 0.47
Epoch 15/100, Training Loss: 0.7016, Training Accuracy: 0.47
Epoch 16/100, Training Loss: 0.7014, Training Accuracy: 0.47
Epoch 17/100, Training Loss: 0.70

MetaLearner(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc12): Linear(in_features=4, out_features=1, bias=False)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)

In [16]:
from sklearn.metrics import precision_recall_fscore_support
#  the test dataset will be input into the sub-models again to produce intermediate test data for the meta-learner. Afterward, the meta-learner will use the intermediate test predictions from the sub-models to make the final predictions.
lstm_test_predictions = lstm_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()
gru_test_predictions = gru_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()

meta_X_test = np.concatenate((lstm_test_predictions, gru_test_predictions), axis=1)
meta_X_test_tensor = torch.tensor(meta_X_test, dtype=torch.float32)

meta_test_predictions = meta_model(meta_X_test_tensor).detach().numpy()
print("meta predictions", meta_test_predictions)
# Evaluation metrics
meta_test_predictions = np.round(meta_test_predictions+0.00001)
accuracy = np.mean(meta_test_predictions == y_test)
print(f'Accuracy: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, meta_test_predictions, average='binary')
print(f'Precision: {precision}, Recall: {recall}, F1 Score: {f1}')



print("lstm predictions", lstm_test_predictions)
lstm_test_predictions = np.round(lstm_test_predictions)
accuracy = np.mean(lstm_test_predictions == y_test)
print(f'Accuracy for lstm: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, lstm_test_predictions, average='binary')
print(f'Precision for lstm: {precision}, Recall for lstm: {recall}, F1 Score for lstm: {f1}')

print("gru predictions", gru_test_predictions)
gru_test_predictions = np.round(gru_test_predictions)
accuracy = np.mean(gru_test_predictions == y_test)
print(f'Accuracy for gru: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, gru_test_predictions, average='binary')
print(f'Precision for gru: {precision}, Recall for gru: {recall}, F1 Score for gru: {f1}')


meta predictions [[0.51113045]
 [0.5090996 ]
 [0.51020056]
 [0.5080761 ]
 [0.507032  ]
 [0.50691724]
 [0.501704  ]
 [0.5027114 ]
 [0.50720006]
 [0.5094635 ]
 [0.5111479 ]
 [0.5096528 ]
 [0.5117406 ]
 [0.5082308 ]
 [0.510304  ]
 [0.5057819 ]
 [0.5089369 ]
 [0.50841886]
 [0.506857  ]
 [0.5087054 ]
 [0.5039586 ]
 [0.50737154]
 [0.5096193 ]
 [0.5073389 ]
 [0.5136494 ]
 [0.50902003]
 [0.5114679 ]
 [0.5080706 ]
 [0.5077259 ]
 [0.5116215 ]]
Accuracy: 0.5666666666666667
Precision: 0.5666666666666667, Recall: 1.0, F1 Score: 0.723404255319149
lstm predictions [[0.452587  ]
 [0.4895481 ]
 [0.49102682]
 [0.5404781 ]
 [0.57125294]
 [0.58678144]
 [0.62515205]
 [0.6117266 ]
 [0.5391221 ]
 [0.5009467 ]
 [0.46134025]
 [0.47794503]
 [0.4467628 ]
 [0.5036222 ]
 [0.47654337]
 [0.55356175]
 [0.5463313 ]
 [0.565579  ]
 [0.57224315]
 [0.6020307 ]
 [0.5968666 ]
 [0.5292081 ]
 [0.47653633]
 [0.509502  ]
 [0.41286322]
 [0.5022406 ]
 [0.49657902]
 [0.53235257]
 [0.5223618 ]
 [0.45456   ]]
Accuracy for lstm: 0.51

In [14]:
#Save the models
save_path = join('models/')
# torch.save(lstm_model.state_dict(), save_path + 'lstm_model_amzn3.pth')
# torch.save(gru_model.state_dict(), save_path + 'gru_model_amzn3.pth')
# torch.save(meta_model.state_dict(), save_path + 'meta_model_amzn3.pth')

# load the models
# lstm_model = LSTMModel()
# lstm_model.load_state_dict(torch.load(save_path + 'lstm_model_amzn2.pth'))
# lstm_model.eval()

# gru_model = GRUModel()
# gru_model.load_state_dict(torch.load(save_path + 'gru_model_amzn2.pth'))
# gru_model.eval()

# meta_model = MetaLearner()
# meta_model.load_state_dict(torch.load(save_path + 'meta_model_amzn2.pth'))
# meta_model.eval()

