# Define the level 1 models

In [237]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import data_prep

# Hyperparameters
timesteps = 10  # Number of time steps
num_features = 6  # Number of features
learning_rate = 0.001

# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(num_features, 50, batch_first=True)
        self.dropout1 = nn.Dropout(0.4)
        self.lstm2 = nn.LSTM(50, 50, batch_first=True)
        self.dropout2 = nn.Dropout(0.4)
        self.lstm3 = nn.LSTM(50, 50, batch_first=True)
        self.dropout3 = nn.Dropout(0.4)
        self.lstm4 = nn.LSTM(50, 50, batch_first=True)  
        self.dropout4 = nn.Dropout(0.4)
        self.fc = nn.Linear(50, 1) 
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x, _ = self.lstm3(x)
        x = self.dropout3(x)
        x, _ = self.lstm4(x)
        x = x[:, -1, :]  # Take the last output from the last LSTM layer
        x = self.dropout4(x)
        x = self.fc(x)   # Linear layer to map to 1 output
        x = self.sigmoid(x)
        return x

# GRU Model
class GRUModel(nn.Module):
    def __init__(self):
        super(GRUModel, self).__init__()
        self.gru1 = nn.GRU(num_features, 50, batch_first=True)
        self.dropout1 = nn.Dropout(0.4)
        self.gru2 = nn.GRU(50, 50, batch_first=True)
        self.dropout2 = nn.Dropout(0.4)
        self.gru3 = nn.GRU(50, 50, batch_first=True)
        self.dropout3 = nn.Dropout(0.4)
        self.gru4 = nn.GRU(50, 50, batch_first=True)
        self.dropout4 = nn.Dropout(0.4)
        self.fc = nn.Linear(50, 1)  # Ensures the output is of size [batch_size, 1]
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x, _ = self.gru1(x)
        x = self.dropout1(x)
        x, _ = self.gru2(x)
        x = self.dropout2(x)
        x, _ = self.gru3(x)
        x = self.dropout3(x)
        x, _ = self.gru4(x)
        x = x[:, -1, :]  # Take the last output
        x = self.dropout4(x)
        x = self.fc(x)   # Linear layer to map to 1 output
        x = self.sigmoid(x)
        return x

# Define the meta learner

In [238]:
# it's a fully-connect neuralnetwork with three layers; the activation function for this model is the Rectified Linear Unit (ReLu).
# NOTE: The paper doesn't specify the number of neurons in the hidden layers, so I'm basing on the stanford paper
class MetaLearner(nn.Module):
    def __init__(self):
        super(MetaLearner, self).__init__()
        self.fc1 = nn.Linear(2, 30)
        self.fc2 = nn.Linear(30, 25)
        self.fc3 = nn.Linear(25, 20)
        self.fc4 = nn.Linear(20, 1)
        self.sigmoid = nn.Sigmoid() 
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        x = self.sigmoid(x) 
        return x

# Load in data

In [239]:
import pandas as pd
import numpy as np
from os.path import join

def get_data_stanford():
    DATA_PATH = join('data', 'original_dataset', 'amzn_source_price_2017-2020.csv')

    HORIZON = 10
    DAYS_FORWARD = 1
    END_SPLIT = 50


    # split_y: tuple
    #     (train_y, validate_y, test_y)
    # split_X: tuple
    #     (train_X, validate_X, test_X)
    split_y, split_X = data_prep.data_prep(DATA_PATH, HORIZON, DAYS_FORWARD, END_SPLIT)

    
    def drop_column(arr, idx):
        return np.delete(arr, idx, axis=2)

    #Drop the twitter column, test
    # split_X = (
    #     drop_column(split_X[0], 4),
    #     drop_column(split_X[1], 4),
    #     drop_column(split_X[2], 4)
    # )
    return split_X, split_y


(x_train, x_val, x_test), (y_train, y_val, y_test) = get_data_stanford()

# drop the 5th column, second to last column


print('x_train[0]\n', x_train[0])



print('x_train.shape', x_train.shape)
print('y_train.shape', y_train.shape)
print('x_val.shape', x_val.shape)
print('y_val.shape', y_val.shape)
print('x_test.shape', x_test.shape)
print('y_test.shape', y_test.shape)



x_train[0]
 [[0.42741352 0.46966103 0.47956216 0.38826329 0.80621952 0.00837898]
 [0.42741352 0.46966103 0.47956216 0.38826329 0.8605262  0.01470041]
 [0.42741352 0.46966103 0.47956216 0.44420338 0.49902886 0.01911414]
 [0.42741352 0.46966103 0.47956216 0.38826329 0.44848639 0.        ]
 [0.42741352 0.46966103 0.47956216 0.38826329 0.48359764 0.02245879]
 [0.42741352 0.46966103 0.47956216 0.36779803 0.43217558 0.03991771]
 [0.42741352 0.46966103 0.47956216 0.65719396 0.59288281 0.04611278]
 [0.42741352 0.577932   0.47956216 0.52627927 0.49700636 0.06858313]
 [0.42741352 0.46966103 0.47956216 0.52811354 0.63369226 0.08896136]
 [0.42741352 0.46966103 0.47956216 0.52811354 0.49611008 0.09566224]]
x_train.shape (397, 10, 6)
y_train.shape (397,)
x_val.shape (50, 10, 6)
y_val.shape (50,)
x_test.shape (50, 10, 6)
y_test.shape (50,)


# Instatiate the models

In [240]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Instantiate models
lstm_model = LSTMModel()
gru_model = GRUModel()
meta_model = MetaLearner()

# Define loss and optimizer
criterion = nn.BCELoss()
lstm_optimizer = optim.RMSprop(lstm_model.parameters(), lr=0.0008) # 16 batch size, 150 epochs
gru_optimizer = optim.RMSprop(gru_model.parameters(), lr=0.0008) # 16 batch size, 200 epochs
base_models_batch_size = 16
# meta_optimizer = optim.Adam(meta_model.parameters(), lr=0.001) # 100 epochs, 8 batch size
meta_learner_batch_size = 8

# Train the base models

In [241]:
import copy

# Convert data to PyTorch tensors and create DataLoader
X_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_val_tensor = torch.tensor(x_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

print(X_train_tensor.shape)
print(y_train_tensor.shape)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=base_models_batch_size, shuffle=True) #Stanford had shuffle true

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=base_models_batch_size) 

import torch

def train_model(model, optimizer, criterion, train_loader, n_epochs=150, val_loader=None, return_lowest_val_loss=False):
    model.train()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_loss = float('inf')

    for epoch in range(n_epochs):
        epoch_loss = 0
        correct_train = 0
        total_train = 0
        
        # Training phase
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch.view(-1, 1))
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

            # Calculate training accuracy
            predicted_train = output.round()
            total_train += y_batch.size(0)
            correct_train += (predicted_train == y_batch.view(-1, 1)).sum().item()

        # Output training loss and accuracy
        train_loss_avg = epoch_loss / len(train_loader)
        train_accuracy = correct_train / total_train
        print(f'Epoch {epoch+1}/{n_epochs}, Training Loss: {train_loss_avg:.4f}, Training Accuracy: {train_accuracy:.2f}', end='')

        # Validation phase (if val_loader is provided)
        if val_loader:
            model.eval()
            val_loss = 0
            correct_val = 0
            total_val = 0
            with torch.no_grad():
                for X_val, y_val in val_loader:
                    output_val = model(X_val)
                    val_loss += criterion(output_val, y_val.view(-1, 1)).item()

                    predicted_val = output_val.round()
                    total_val += y_val.size(0)
                    correct_val += (predicted_val == y_val.view(-1, 1)).sum().item()

            val_loss_avg = val_loss / len(val_loader)
            val_accuracy = correct_val / total_val
            print(f', Validation Loss: {val_loss_avg:.4f}, Validation Accuracy: {val_accuracy:.2f}')
            
            # Check if this is the best model so far
            if val_loss_avg < best_val_loss + 1e-2 : # Add 1e-2 as a "buffer" to favor the latest model
                best_val_loss = val_loss_avg
                best_model_wts = copy.deepcopy(model.state_dict())

            model.train()

        else:
            print()  # Just move to the next line

    # Load the best model weights
    if return_lowest_val_loss and val_loader != None:
        print(f'Lowest validation loss: {best_val_loss:.4f}')
        model.load_state_dict(best_model_wts)
    return model



# Train the LSTM model
print("Training LSTM Model")
train_model(lstm_model, lstm_optimizer, criterion, train_loader, 100, val_loader, False)

# Train the GRU model
print("Training GRU Model")
train_model(gru_model, gru_optimizer, criterion, train_loader, 100, val_loader, False)

torch.Size([397, 10, 6])
torch.Size([397])
Training LSTM Model
Epoch 1/100, Training Loss: 0.6515, Training Accuracy: 0.66, Validation Loss: 0.6541, Validation Accuracy: 0.68
Epoch 2/100, Training Loss: 0.6471, Training Accuracy: 0.66, Validation Loss: 0.6547, Validation Accuracy: 0.68
Epoch 3/100, Training Loss: 0.6399, Training Accuracy: 0.66, Validation Loss: 0.6534, Validation Accuracy: 0.68
Epoch 4/100, Training Loss: 0.6332, Training Accuracy: 0.66, Validation Loss: 0.6553, Validation Accuracy: 0.68
Epoch 5/100, Training Loss: 0.6326, Training Accuracy: 0.66, Validation Loss: 0.6570, Validation Accuracy: 0.68
Epoch 6/100, Training Loss: 0.6244, Training Accuracy: 0.66, Validation Loss: 0.6556, Validation Accuracy: 0.68
Epoch 7/100, Training Loss: 0.6194, Training Accuracy: 0.66, Validation Loss: 0.6626, Validation Accuracy: 0.68
Epoch 8/100, Training Loss: 0.6251, Training Accuracy: 0.66, Validation Loss: 0.6547, Validation Accuracy: 0.68
Epoch 9/100, Training Loss: 0.6098, Train

KeyboardInterrupt: 

# Use base models to predict the validation data, this will be used as input to the Meta Learner

In [None]:
lstm_val_predictions = lstm_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy().reshape(-1,1)
gru_val_predictions = gru_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy().reshape(-1,1)

# lstm_pred = lstm_model.predict(X).reshape(-1, 1)
# gru_pred = gru_model.predict(X).reshape(-1, 1)

# Form and return new data set
# new_X = np.hstack((lstm_pred, gru_pred))


# Combine predictions to form new training data for the meta-learner
meta_X_train = np.concatenate((lstm_val_predictions, gru_val_predictions), axis=1)#meta_X_train = np.hstack((lstm_val_predictions, gru_val_predictions))#

print(meta_X_train.shape)



(50, 2)


# Train meta learner

In [None]:
meta_model = MetaLearner()
meta_criterion = nn.BCELoss()
meta_optimizer = optim.Adam(meta_model.parameters(), eps=1e-7) #default keras LR

meta_X_train_tensor = torch.tensor(meta_X_train, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

meta_train_dataset = TensorDataset(meta_X_train_tensor, y_val_tensor)
meta_train_loader = DataLoader(meta_train_dataset, batch_size=8, shuffle=True)

train_model(meta_model, meta_optimizer, meta_criterion, meta_train_loader, 100, return_lowest_val_loss=False)

Epoch 1/100, Training Loss: 0.6546, Training Accuracy: 0.68
Epoch 2/100, Training Loss: 0.6609, Training Accuracy: 0.68
Epoch 3/100, Training Loss: 0.6577, Training Accuracy: 0.68
Epoch 4/100, Training Loss: 0.6375, Training Accuracy: 0.68
Epoch 5/100, Training Loss: 0.6673, Training Accuracy: 0.68
Epoch 6/100, Training Loss: 0.6264, Training Accuracy: 0.68
Epoch 7/100, Training Loss: 0.6184, Training Accuracy: 0.68
Epoch 8/100, Training Loss: 0.6376, Training Accuracy: 0.68
Epoch 9/100, Training Loss: 0.6057, Training Accuracy: 0.68
Epoch 10/100, Training Loss: 0.6275, Training Accuracy: 0.68
Epoch 11/100, Training Loss: 0.6170, Training Accuracy: 0.68
Epoch 12/100, Training Loss: 0.5798, Training Accuracy: 0.68
Epoch 13/100, Training Loss: 0.5723, Training Accuracy: 0.68
Epoch 14/100, Training Loss: 0.6078, Training Accuracy: 0.68
Epoch 15/100, Training Loss: 0.6033, Training Accuracy: 0.68
Epoch 16/100, Training Loss: 0.6005, Training Accuracy: 0.68
Epoch 17/100, Training Loss: 0.58

MetaLearner(
  (fc1): Linear(in_features=2, out_features=30, bias=True)
  (fc2): Linear(in_features=30, out_features=25, bias=True)
  (fc3): Linear(in_features=25, out_features=20, bias=True)
  (fc4): Linear(in_features=20, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)

In [None]:
from sklearn.metrics import precision_recall_fscore_support
#  the test dataset will be input into the sub-models again to produce intermediate test data for the meta-learner. Afterward, the meta-learner will use the intermediate test predictions from the sub-models to make the final predictions.
lstm_test_predictions = lstm_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()
gru_test_predictions = gru_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()

meta_X_test = np.concatenate((lstm_test_predictions, gru_test_predictions), axis=1)
meta_X_test_tensor = torch.tensor(meta_X_test, dtype=torch.float32)

meta_test_predictions = meta_model(meta_X_test_tensor).detach().numpy()
print("meta predictions", meta_test_predictions)
# Evaluation metrics
meta_test_predictions = np.round(meta_test_predictions)
accuracy = np.mean(meta_test_predictions == y_test)
print(f'Accuracy: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, meta_test_predictions, average='binary')
print(f'Precision: {precision}, Recall: {recall}, F1 Score: {f1}')



print("lstm predictions", lstm_test_predictions)
lstm_test_predictions = np.round(lstm_test_predictions)
accuracy = np.mean(lstm_test_predictions == y_test)
print(f'Accuracy for lstm: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, lstm_test_predictions, average='binary')
print(f'Precision for lstm: {precision}, Recall for lstm: {recall}, F1 Score for lstm: {f1}')

print("gru predictions", gru_test_predictions)
gru_test_predictions = np.round(gru_test_predictions)
accuracy = np.mean(gru_test_predictions == y_test)
print(f'Accuracy for gru: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, gru_test_predictions, average='binary')
print(f'Precision for gru: {precision}, Recall for gru: {recall}, F1 Score for gru: {f1}')


meta predictions [[0.4596751 ]
 [0.9445052 ]
 [0.891317  ]
 [0.20655198]
 [0.21807212]
 [0.19733097]
 [0.6168381 ]
 [0.9738109 ]
 [0.4924567 ]
 [0.95471716]
 [0.92396295]
 [0.9399569 ]
 [0.19092536]
 [0.8330645 ]
 [0.87798727]
 [0.22552815]
 [0.2283198 ]
 [0.9232679 ]
 [0.9632175 ]
 [0.9495494 ]
 [0.22471346]
 [0.24222519]
 [0.2109117 ]
 [0.6559626 ]
 [0.9448133 ]
 [0.27330583]
 [0.9460121 ]
 [0.93008554]
 [0.18466394]
 [0.18972455]
 [0.29562715]
 [0.9526687 ]
 [0.208376  ]
 [0.8526266 ]
 [0.9860663 ]
 [0.9844521 ]
 [0.18427022]
 [0.5105765 ]
 [0.9652675 ]
 [0.7126261 ]
 [0.81434083]
 [0.98421466]
 [0.17649756]
 [0.30720747]
 [0.691403  ]
 [0.7473234 ]
 [0.93057656]
 [0.18786372]
 [0.69886464]
 [0.9399361 ]]
Accuracy: 0.516
Precision: 0.7333333333333333, Recall: 0.7586206896551724, F1 Score: 0.7457627118644068
lstm predictions [[0.6143636 ]
 [0.6153542 ]
 [0.5901982 ]
 [0.5843916 ]
 [0.61131275]
 [0.57749844]
 [0.58080757]
 [0.6009993 ]
 [0.61348397]
 [0.606457  ]
 [0.592641  ]
 [0.628