# Define the level 1 models

In [319]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import data_prep

# Hyperparameters
timesteps = 10  # Number of time steps
num_features = 6  # Number of features
n_epochs = 100  # Number of epochs
learning_rate = 0.001

# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(num_features, 60, batch_first=True)
        self.dropout1 = nn.Dropout(0.4)
        self.lstm2 = nn.LSTM(60, 55, batch_first=True)
        self.dropout2 = nn.Dropout(0.4)
        self.lstm3 = nn.LSTM(55, 50, batch_first=True)
        self.dropout3 = nn.Dropout(0.4)
        self.lstm4 = nn.LSTM(50, 45, batch_first=True)  
        self.dropout4 = nn.Dropout(0.4)
        self.fc = nn.Linear(45, 1) 
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x, _ = self.lstm3(x)
        x = self.dropout3(x)
        x, _ = self.lstm4(x)
        x = x[:, -1, :]  # Take the last output from the last LSTM layer
        x = self.dropout4(x)
        x = self.fc(x)   # Linear layer to map to 1 output
        x = self.sigmoid(x)
        return x

# GRU Model
class GRUModel(nn.Module):
    def __init__(self):
        super(GRUModel, self).__init__()
        self.gru1 = nn.GRU(num_features, 60, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.gru2 = nn.GRU(60, 55, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.gru3 = nn.GRU(55, 50, batch_first=True)
        self.dropout3 = nn.Dropout(0.2)
        self.gru4 = nn.GRU(50, 45, batch_first=True)
        self.dropout4 = nn.Dropout(0.2)
        self.fc = nn.Linear(45, 1)  # Ensures the output is of size [batch_size, 1]
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x, _ = self.gru1(x)
        x = self.dropout1(x)
        x, _ = self.gru2(x)
        x = self.dropout2(x)
        x, _ = self.gru3(x)
        x = self.dropout3(x)
        x, _ = self.gru4(x)
        x = x[:, -1, :]  # Take the last output
        x = self.dropout4(x)
        x = self.fc(x)   # Linear layer to map to 1 output
        x = self.sigmoid(x)
        return x

# Define the meta learner

In [320]:
# it's a fully-connect neuralnetwork with three layers; the activation function for this model is the Rectified Linear Unit (ReLu).
# NOTE: The paper doesn't specify the number of neurons in the hidden layers, so I'm basing on the stanford paper
class MetaLearner(nn.Module):
    def __init__(self):
        super(MetaLearner, self).__init__()
        self.fc1 = nn.Linear(2, 30)
        self.fc2 = nn.Linear(30, 25)
        self.fc3 = nn.Linear(25, 20)
        self.fc4 = nn.Linear(20, 1)
        self.sigmoid = nn.Sigmoid() 
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        x = self.sigmoid(x) 
        return x

# Load in data

In [321]:
import pandas as pd
import numpy as np
from os.path import join

def get_data_stanford():
    DATA_PATH = join('data', 'original_dataset', 'amzn_source_price_2017-2020.csv')

    HORIZON = 10
    DAYS_FORWARD = 1
    END_SPLIT = 40


    # split_y: tuple
    #     (train_y, validate_y, test_y)
    # split_X: tuple
    #     (train_X, validate_X, test_X)
    split_y, split_X = data_prep.data_prep(DATA_PATH, HORIZON, DAYS_FORWARD, END_SPLIT)
    return split_X, split_y


(x_train, x_val, x_test), (y_train, y_val, y_test) = get_data_stanford()


print('x_train.shape', x_train.shape)
print('y_train.shape', y_train.shape)
print('x_val.shape', x_val.shape)
print('y_val.shape', y_val.shape)
print('x_test.shape', x_test.shape)
print('y_test.shape', y_test.shape)



x_train.shape (417, 10, 6)
y_train.shape (417,)
x_val.shape (40, 10, 6)
y_val.shape (40,)
x_test.shape (40, 10, 6)
y_test.shape (40,)


# Instatiate the models

In [322]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Instantiate models
lstm_model = LSTMModel()
gru_model = GRUModel()
meta_model = MetaLearner()

# Define loss and optimizer
criterion = nn.BCELoss()
lstm_optimizer = optim.RMSprop(lstm_model.parameters(), lr=0.0008, alpha=0.9) # 16 batch size, 150 epochs
gru_optimizer = optim.RMSprop(gru_model.parameters(), lr=0.0008, alpha=0.9) # 16 batch size, 200 epochs
base_models_batch_size = 16
# meta_optimizer = optim.Adam(meta_model.parameters(), lr=0.001) # 100 epochs, 8 batch size
meta_learner_batch_size = 8

# Train the base models

In [323]:
import copy

# Convert data to PyTorch tensors and create DataLoader
X_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_val_tensor = torch.tensor(x_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

print(X_train_tensor.shape)
print(y_train_tensor.shape)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=base_models_batch_size, shuffle=True) #Stanford had shuffle true

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=base_models_batch_size) 

import torch

def train_model(model, optimizer, criterion, train_loader, n_epochs=150, val_loader=None, return_lowest_val_loss=False):
    model.train()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_loss = float('inf')

    for epoch in range(n_epochs):
        epoch_loss = 0
        correct_train = 0
        total_train = 0
        
        # Training phase
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch.view(-1, 1))
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

            # Calculate training accuracy
            predicted_train = output.round()
            total_train += y_batch.size(0)
            correct_train += (predicted_train == y_batch.view(-1, 1)).sum().item()

        # Output training loss and accuracy
        train_loss_avg = epoch_loss / len(train_loader)
        train_accuracy = correct_train / total_train
        print(f'Epoch {epoch+1}/{n_epochs}, Training Loss: {train_loss_avg:.4f}, Training Accuracy: {train_accuracy:.2f}', end='')

        # Validation phase (if val_loader is provided)
        if val_loader:
            model.eval()
            val_loss = 0
            correct_val = 0
            total_val = 0
            with torch.no_grad():
                for X_val, y_val in val_loader:
                    output_val = model(X_val)
                    val_loss += criterion(output_val, y_val.view(-1, 1)).item()

                    predicted_val = output_val.round()
                    total_val += y_val.size(0)
                    correct_val += (predicted_val == y_val.view(-1, 1)).sum().item()

            val_loss_avg = val_loss / len(val_loader)
            val_accuracy = correct_val / total_val
            print(f', Validation Loss: {val_loss_avg:.4f}, Validation Accuracy: {val_accuracy:.2f}')
            
            # Check if this is the best model so far and update accordingly
            if val_loss_avg < best_val_loss + 1e-2 : # Add 1e-2 as a "buffer" to prevent early stopping
                best_val_loss = val_loss_avg
                best_model_wts = copy.deepcopy(model.state_dict())

            model.train()

        else:
            print()  # Just move to the next line

    # Load the best model weights
    if return_lowest_val_loss and val_loader != None:
        print(f'Lowest validation loss: {best_val_loss:.4f}')
        model.load_state_dict(best_model_wts)
    return model



# Train the LSTM model
print("Training LSTM Model")
train_model(lstm_model, lstm_optimizer, criterion, train_loader, 100, val_loader, False)

# Train the GRU model
print("Training GRU Model")
train_model(gru_model, gru_optimizer, criterion, train_loader, 100, val_loader, False)

torch.Size([417, 10, 6])
torch.Size([417])
Training LSTM Model
Epoch 1/100, Training Loss: 0.6670, Training Accuracy: 0.66, Validation Loss: 0.6411, Validation Accuracy: 0.70
Epoch 2/100, Training Loss: 0.6610, Training Accuracy: 0.66, Validation Loss: 0.6402, Validation Accuracy: 0.70


Epoch 3/100, Training Loss: 0.6592, Training Accuracy: 0.66, Validation Loss: 0.6439, Validation Accuracy: 0.70
Epoch 4/100, Training Loss: 0.6385, Training Accuracy: 0.66, Validation Loss: 0.6364, Validation Accuracy: 0.70
Epoch 5/100, Training Loss: 0.6348, Training Accuracy: 0.66, Validation Loss: 0.6434, Validation Accuracy: 0.70
Epoch 6/100, Training Loss: 0.6634, Training Accuracy: 0.66, Validation Loss: 0.6402, Validation Accuracy: 0.70
Epoch 7/100, Training Loss: 0.6293, Training Accuracy: 0.66, Validation Loss: 0.6455, Validation Accuracy: 0.70
Epoch 8/100, Training Loss: 0.6389, Training Accuracy: 0.66, Validation Loss: 0.6367, Validation Accuracy: 0.70
Epoch 9/100, Training Loss: 0.6281, Training Accuracy: 0.66, Validation Loss: 0.6363, Validation Accuracy: 0.70
Epoch 10/100, Training Loss: 0.6308, Training Accuracy: 0.66, Validation Loss: 0.6364, Validation Accuracy: 0.70
Epoch 11/100, Training Loss: 0.6145, Training Accuracy: 0.66, Validation Loss: 0.7748, Validation Accur

GRUModel(
  (gru1): GRU(6, 60, batch_first=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (gru2): GRU(60, 55, batch_first=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (gru3): GRU(55, 50, batch_first=True)
  (dropout3): Dropout(p=0.2, inplace=False)
  (gru4): GRU(50, 45, batch_first=True)
  (dropout4): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=45, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

# Use base models to predict the validation data, this will be used as input to the Meta Learner

In [324]:
lstm_val_predictions = lstm_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy().reshape(-1,1)
gru_val_predictions = gru_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy().reshape(-1,1)

# lstm_pred = lstm_model.predict(X).reshape(-1, 1)
# gru_pred = gru_model.predict(X).reshape(-1, 1)

# Form and return new data set
# new_X = np.hstack((lstm_pred, gru_pred))


# Combine predictions to form new training data for the meta-learner
meta_X_train = np.concatenate((lstm_val_predictions, gru_val_predictions), axis=1)#meta_X_train = np.hstack((lstm_val_predictions, gru_val_predictions))#

print(meta_X_train.shape)

print(meta_X_train)

print(y_val)

print(x_val)

(40, 2)
[[0.5672675  0.9887067 ]
 [0.59372544 0.91259164]
 [0.5681934  0.74513763]
 [0.5764383  0.8163223 ]
 [0.57982284 0.99815685]
 [0.5800945  0.09205566]
 [0.5663939  0.82022727]
 [0.5592656  0.1547592 ]
 [0.56213325 0.82089937]
 [0.5873597  0.9643301 ]
 [0.57190365 0.8906842 ]
 [0.57110786 0.950954  ]
 [0.5720682  0.8066745 ]
 [0.5766675  0.30384988]
 [0.57389975 0.89237195]
 [0.5765007  0.89774853]
 [0.57937384 0.94644415]
 [0.5732909  0.25282514]
 [0.5783882  0.94709915]
 [0.62227696 0.9589569 ]
 [0.55543286 0.98343074]
 [0.568101   0.3958734 ]
 [0.5775493  0.9332793 ]
 [0.60559773 0.9663629 ]
 [0.59249026 0.7869477 ]
 [0.58477813 0.9015384 ]
 [0.59818697 0.812958  ]
 [0.5601618  0.9770672 ]
 [0.5756962  0.4369235 ]
 [0.5797802  0.96791774]
 [0.587913   0.9604295 ]
 [0.5720901  0.8582591 ]
 [0.5818079  0.81897414]
 [0.59070575 0.31604743]
 [0.5909531  0.25838304]
 [0.5622085  0.04299216]
 [0.5850208  0.5245086 ]
 [0.56190735 0.9162881 ]
 [0.5631355  0.5627613 ]
 [0.56241727 0.86

# Train meta learner

In [325]:
meta_model = MetaLearner()
meta_criterion = nn.BCELoss()
meta_optimizer = optim.Adam(meta_model.parameters(), eps=1e-7) #default keras LR

meta_X_train_tensor = torch.tensor(meta_X_train, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

meta_train_dataset = TensorDataset(meta_X_train_tensor, y_val_tensor)
meta_train_loader = DataLoader(meta_train_dataset, batch_size=8, shuffle=True)

train_model(meta_model, meta_optimizer, meta_criterion, meta_train_loader, 100, return_lowest_val_loss=False)

Epoch 1/100, Training Loss: 0.6842, Training Accuracy: 0.70
Epoch 2/100, Training Loss: 0.6761, Training Accuracy: 0.70
Epoch 3/100, Training Loss: 0.6695, Training Accuracy: 0.70
Epoch 4/100, Training Loss: 0.6625, Training Accuracy: 0.70
Epoch 5/100, Training Loss: 0.6565, Training Accuracy: 0.70
Epoch 6/100, Training Loss: 0.6505, Training Accuracy: 0.70
Epoch 7/100, Training Loss: 0.6438, Training Accuracy: 0.70
Epoch 8/100, Training Loss: 0.6378, Training Accuracy: 0.70
Epoch 9/100, Training Loss: 0.6304, Training Accuracy: 0.70
Epoch 10/100, Training Loss: 0.6224, Training Accuracy: 0.70
Epoch 11/100, Training Loss: 0.6147, Training Accuracy: 0.70
Epoch 12/100, Training Loss: 0.6041, Training Accuracy: 0.70
Epoch 13/100, Training Loss: 0.5945, Training Accuracy: 0.70
Epoch 14/100, Training Loss: 0.5825, Training Accuracy: 0.70
Epoch 15/100, Training Loss: 0.5692, Training Accuracy: 0.70
Epoch 16/100, Training Loss: 0.5570, Training Accuracy: 0.72


Epoch 17/100, Training Loss: 0.5413, Training Accuracy: 0.75
Epoch 18/100, Training Loss: 0.5252, Training Accuracy: 0.78
Epoch 19/100, Training Loss: 0.5074, Training Accuracy: 0.75
Epoch 20/100, Training Loss: 0.4907, Training Accuracy: 0.78
Epoch 21/100, Training Loss: 0.4711, Training Accuracy: 0.82
Epoch 22/100, Training Loss: 0.4535, Training Accuracy: 0.82
Epoch 23/100, Training Loss: 0.4379, Training Accuracy: 0.82
Epoch 24/100, Training Loss: 0.4168, Training Accuracy: 0.85
Epoch 25/100, Training Loss: 0.4023, Training Accuracy: 0.85
Epoch 26/100, Training Loss: 0.3862, Training Accuracy: 0.88
Epoch 27/100, Training Loss: 0.3706, Training Accuracy: 0.88
Epoch 28/100, Training Loss: 0.3581, Training Accuracy: 0.88
Epoch 29/100, Training Loss: 0.3453, Training Accuracy: 0.88
Epoch 30/100, Training Loss: 0.3344, Training Accuracy: 0.88
Epoch 31/100, Training Loss: 0.3237, Training Accuracy: 0.88
Epoch 32/100, Training Loss: 0.3169, Training Accuracy: 0.90
Epoch 33/100, Training L

MetaLearner(
  (fc1): Linear(in_features=2, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=25, bias=True)
  (fc3): Linear(in_features=25, out_features=20, bias=True)
  (fc4): Linear(in_features=20, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)

In [340]:
from sklearn.metrics import precision_recall_fscore_support
#  the test dataset will be input into the sub-models again to produce intermediate test data for the meta-learner. Afterward, the meta-learner will use the intermediate test predictions from the sub-models to make the final predictions.
lstm_test_predictions = lstm_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()
gru_test_predictions = gru_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()

meta_X_test = np.concatenate((lstm_test_predictions, gru_test_predictions), axis=1)
meta_X_test_tensor = torch.tensor(meta_X_test, dtype=torch.float32)

meta_test_predictions = meta_model(meta_X_test_tensor).detach().numpy()
print("meta predictions", meta_test_predictions)
# Evaluation metrics
meta_test_predictions = np.round(meta_test_predictions)
accuracy = np.mean(meta_test_predictions == y_test)
print(f'Accuracy: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, meta_test_predictions, average='binary')
print(f'Precision: {precision}, Recall: {recall}, F1 Score: {f1}')



print("lstm predictions", lstm_test_predictions)
lstm_test_predictions = np.round(lstm_test_predictions)
accuracy = np.mean(lstm_test_predictions == y_test)
print(f'Accuracy for lstm: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, lstm_test_predictions, average='binary')
print(f'Precision for lstm: {precision}, Recall for lstm: {recall}, F1 Score for lstm: {f1}')

print("gru predictions", gru_test_predictions)
gru_test_predictions = np.round(gru_test_predictions)
accuracy = np.mean(gru_test_predictions == y_test)
print(f'Accuracy for gru: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, gru_test_predictions, average='binary')
print(f'Precision for gru: {precision}, Recall for gru: {recall}, F1 Score for gru: {f1}')


meta predictions [[0.9131388 ]
 [0.9680065 ]
 [0.02281385]
 [0.92675203]
 [0.9663625 ]
 [0.21873815]
 [0.04055015]
 [0.94133914]
 [0.9486576 ]
 [0.9515744 ]
 [0.0264353 ]
 [0.05965887]
 [0.02369667]
 [0.5067859 ]
 [0.94166625]
 [0.9733138 ]
 [0.972649  ]
 [0.9435256 ]
 [0.02110143]
 [0.0223498 ]
 [0.02352152]
 [0.93987817]
 [0.02381654]
 [0.96990156]
 [0.9753897 ]
 [0.9756007 ]
 [0.02370224]
 [0.20316926]
 [0.9309415 ]
 [0.9506403 ]
 [0.9289933 ]
 [0.97504413]
 [0.02279282]
 [0.0471906 ]
 [0.34854087]
 [0.89718896]
 [0.9663998 ]
 [0.02265418]
 [0.9562785 ]
 [0.9742818 ]]
Accuracy: 0.515
Precision: 0.75, Recall: 0.782608695652174, F1 Score: 0.7659574468085107
lstm predictions [[0.56499434]
 [0.5735917 ]
 [0.57468987]
 [0.5523623 ]
 [0.5577896 ]
 [0.5747415 ]
 [0.559032  ]
 [0.5675016 ]
 [0.60748863]
 [0.55791265]
 [0.5882119 ]
 [0.60005313]
 [0.5873236 ]
 [0.57022893]
 [0.582237  ]
 [0.5559286 ]
 [0.55778563]
 [0.5567146 ]
 [0.5936205 ]
 [0.5660339 ]
 [0.57691   ]
 [0.5728008 ]
 [0.5791