# Define the level 1 models

In [91]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import data_prep

# Hyperparameters
timesteps = 10  # Number of time steps
num_features = 6  # Number of features
learning_rate = 0.001

# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(num_features, 50, batch_first=True)
        self.dropout1 = nn.Dropout(0.4)
        self.lstm2 = nn.LSTM(50, 50, batch_first=True)
        self.dropout2 = nn.Dropout(0.4)
        self.lstm3 = nn.LSTM(50, 50, batch_first=True)
        self.dropout3 = nn.Dropout(0.4)
        self.lstm4 = nn.LSTM(50, 50, batch_first=True)  
        self.dropout4 = nn.Dropout(0.4)
        self.fc = nn.Linear(50, 1) 
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x, _ = self.lstm3(x)
        x = self.dropout3(x)
        x, _ = self.lstm4(x)
        x = x[:, -1, :]  # Take the last output from the last LSTM layer
        x = self.dropout4(x)
        x = self.fc(x)   # Linear layer to map to 1 output
        x = self.sigmoid(x)
        return x

# GRU Model
class GRUModel(nn.Module):
    def __init__(self):
        super(GRUModel, self).__init__()
        self.gru1 = nn.GRU(num_features, 50, batch_first=True)
        self.dropout1 = nn.Dropout(0.4)
        self.gru2 = nn.GRU(50, 50, batch_first=True)
        self.dropout2 = nn.Dropout(0.4)
        self.gru3 = nn.GRU(50, 50, batch_first=True)
        self.dropout3 = nn.Dropout(0.4)
        self.gru4 = nn.GRU(50, 50, batch_first=True)
        self.dropout4 = nn.Dropout(0.4)
        self.fc = nn.Linear(50, 1)  # Ensures the output is of size [batch_size, 1]
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x, _ = self.gru1(x)
        x = self.dropout1(x)
        x, _ = self.gru2(x)
        x = self.dropout2(x)
        x, _ = self.gru3(x)
        x = self.dropout3(x)
        x, _ = self.gru4(x)
        x = x[:, -1, :]  # Take the last output
        x = self.dropout4(x)
        x = self.fc(x)   # Linear layer to map to 1 output
        x = self.sigmoid(x)
        return x

# Define the meta learner

In [92]:
# it's a fully-connect neuralnetwork with three layers; the activation function for this model is the Rectified Linear Unit (ReLu).
# NOTE: The paper doesn't specify the number of neurons in the hidden layers, so I'm basing on the stanford paper
class MetaLearner(nn.Module):
    def __init__(self):
        super(MetaLearner, self).__init__()
        self.fc1 = nn.Linear(2, 4)
        self.fc12 = nn.Linear(4, 1)
        # self.fc2 = nn.Linear(30, 25)
        # self.fc3 = nn.Linear(25, 20)
        # self.fc4 = nn.Linear(20, 1)
        self.sigmoid = nn.Sigmoid() 
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc12(x)
        # x = self.relu(self.fc2(x))
        # x = self.relu(self.fc3(x))
        # x = self.fc4(x)
        x = self.sigmoid(x) 
        return x

# Load in data

In [93]:
import pandas as pd
import numpy as np
from os.path import join

def get_data_stanford():
    DATA_PATH = join('data', 'original_dataset', 'amzn_source_price_2017-2020.csv')

    HORIZON = 10
    DAYS_FORWARD = 1
    END_SPLIT = 50


    # split_y: tuple
    #     (train_y, validate_y, test_y)
    # split_X: tuple
    #     (train_X, validate_X, test_X)
    split_y, split_X = data_prep.data_prep(DATA_PATH, HORIZON, DAYS_FORWARD, END_SPLIT)

    
    def drop_column(arr, idx):
        return np.delete(arr, idx, axis=2)

    #Drop the twitter column, test
    # split_X = (
    #     drop_column(split_X[0], 4),
    #     drop_column(split_X[1], 4),
    #     drop_column(split_X[2], 4)
    # )
    return split_X, split_y



(x_train, x_val, x_test), (y_train, y_val, y_test) = get_data_stanford()

# drop the 5th column, second to last column


print('x_train[0]\n', x_train[0])



print('x_train.shape', x_train.shape)
print('y_train.shape', y_train.shape)
print('x_val.shape', x_val.shape)
print('y_val.shape', y_val.shape)
print('x_test.shape', x_test.shape)
print('y_test.shape', y_test.shape)



[[0.42741352 0.46966103 0.47956216 0.3882633  0.8062195  0.00837898]
 [0.42741352 0.46966103 0.47956216 0.3882633  0.8605262  0.01470041]
 [0.42741352 0.46966103 0.47956216 0.44420338 0.49902886 0.01911414]
 ...
 [0.14965403 0.46966103 0.47956216 0.5406128  0.6189838  0.80494   ]
 [0.42741352 0.46966103 0.47956216 0.7042038  0.69379574 0.77860785]
 [0.42741352 0.46966103 0.47956216 0.3882633  0.6054436  0.7796998 ]]
raw_sample_size 507
feature_size 6
497
x_train[0]
 [[0.42741352 0.46966103 0.47956216 0.38826329 0.80621952 0.00837898]
 [0.42741352 0.46966103 0.47956216 0.38826329 0.8605262  0.01470041]
 [0.42741352 0.46966103 0.47956216 0.44420338 0.49902886 0.01911414]
 [0.42741352 0.46966103 0.47956216 0.38826329 0.44848639 0.        ]
 [0.42741352 0.46966103 0.47956216 0.38826329 0.48359764 0.02245879]
 [0.42741352 0.46966103 0.47956216 0.36779803 0.43217558 0.03991771]
 [0.42741352 0.46966103 0.47956216 0.65719396 0.59288281 0.04611278]
 [0.42741352 0.577932   0.47956216 0.52627927 

# Instatiate the models

In [94]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Instantiate models
lstm_model = LSTMModel()
gru_model = GRUModel()
meta_model = MetaLearner()

# Define loss and optimizer
criterion = nn.BCELoss()
lstm_optimizer = optim.RMSprop(lstm_model.parameters(), lr=0.0016) # 16 batch size, 150 epochs
gru_optimizer = optim.RMSprop(gru_model.parameters(), lr=0.0008) # 16 batch size, 200 epochs
base_models_batch_size = 16
# meta_optimizer = optim.Adam(meta_model.parameters(), lr=0.001) # 100 epochs, 8 batch size
meta_learner_batch_size = 8

# Train the base models

In [95]:
import copy

# Convert data to PyTorch tensors and create DataLoader
X_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_val_tensor = torch.tensor(x_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

print(X_train_tensor.shape)
print(y_train_tensor.shape)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=base_models_batch_size, shuffle=True) #Stanford had shuffle true

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=base_models_batch_size) 

import torch

def train_model(model, optimizer, criterion, train_loader, n_epochs=150, val_loader=None, return_lowest_val_loss=False):
    model.train()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_loss = float('inf')

    for epoch in range(n_epochs):
        epoch_loss = 0
        correct_train = 0
        total_train = 0
        
        # Training phase
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch.view(-1, 1))
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

            # Calculate training accuracy
            predicted_train = output.round()
            total_train += y_batch.size(0)
            correct_train += (predicted_train == y_batch.view(-1, 1)).sum().item()

        # Output training loss and accuracy
        train_loss_avg = epoch_loss / len(train_loader)
        train_accuracy = correct_train / total_train
        print(f'Epoch {epoch+1}/{n_epochs}, Training Loss: {train_loss_avg:.4f}, Training Accuracy: {train_accuracy:.2f}', end='')

        # Validation phase (if val_loader is provided)
        if val_loader:
            model.eval()
            val_loss = 0
            correct_val = 0
            total_val = 0
            with torch.no_grad():
                for X_val, y_val in val_loader:
                    output_val = model(X_val)
                    val_loss += criterion(output_val, y_val.view(-1, 1)).item()

                    predicted_val = output_val.round()
                    total_val += y_val.size(0)
                    correct_val += (predicted_val == y_val.view(-1, 1)).sum().item()

            val_loss_avg = val_loss / len(val_loader)
            val_accuracy = correct_val / total_val
            print(f', Validation Loss: {val_loss_avg:.4f}, Validation Accuracy: {val_accuracy:.2f}')
            
            # Check if this is the best model so far
            if val_loss_avg < best_val_loss + 1e-2 : # Add 1e-2 as a "buffer" to favor the latest model
                best_val_loss = val_loss_avg
                best_model_wts = copy.deepcopy(model.state_dict())

            model.train()

        else:
            print()  # Just move to the next line

    # Load the best model weights
    if return_lowest_val_loss and val_loader != None:
        print(f'Lowest validation loss: {best_val_loss:.4f}')
        model.load_state_dict(best_model_wts)
    return model



# Train the LSTM model
print("Training LSTM Model")
train_model(lstm_model, lstm_optimizer, criterion, train_loader, 150, val_loader, False)

# Train the GRU model
print("Training GRU Model")
train_model(gru_model, gru_optimizer, criterion, train_loader, 200, val_loader, False) # 200

torch.Size([397, 10, 6])
torch.Size([397])
Training LSTM Model
Epoch 1/150, Training Loss: 0.6634, Training Accuracy: 0.64, Validation Loss: 0.6534, Validation Accuracy: 0.68
Epoch 2/150, Training Loss: 0.6437, Training Accuracy: 0.66, Validation Loss: 0.6551, Validation Accuracy: 0.68
Epoch 3/150, Training Loss: 0.6445, Training Accuracy: 0.66, Validation Loss: 0.6531, Validation Accuracy: 0.68
Epoch 4/150, Training Loss: 0.6404, Training Accuracy: 0.66, Validation Loss: 0.6539, Validation Accuracy: 0.68
Epoch 5/150, Training Loss: 0.6441, Training Accuracy: 0.66, Validation Loss: 0.6545, Validation Accuracy: 0.68
Epoch 6/150, Training Loss: 0.6446, Training Accuracy: 0.66, Validation Loss: 0.6552, Validation Accuracy: 0.68
Epoch 7/150, Training Loss: 0.6402, Training Accuracy: 0.66, Validation Loss: 0.6537, Validation Accuracy: 0.68
Epoch 8/150, Training Loss: 0.6382, Training Accuracy: 0.67, Validation Loss: 0.6541, Validation Accuracy: 0.68
Epoch 9/150, Training Loss: 0.6511, Train

GRUModel(
  (gru1): GRU(6, 50, batch_first=True)
  (dropout1): Dropout(p=0.4, inplace=False)
  (gru2): GRU(50, 50, batch_first=True)
  (dropout2): Dropout(p=0.4, inplace=False)
  (gru3): GRU(50, 50, batch_first=True)
  (dropout3): Dropout(p=0.4, inplace=False)
  (gru4): GRU(50, 50, batch_first=True)
  (dropout4): Dropout(p=0.4, inplace=False)
  (fc): Linear(in_features=50, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

# Use base models to predict the validation data, this will be used as input to the Meta Learner

In [96]:
lstm_val_predictions = lstm_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy().reshape(-1,1)
gru_val_predictions = gru_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy().reshape(-1,1)

# lstm_pred = lstm_model.predict(X).reshape(-1, 1)
# gru_pred = gru_model.predict(X).reshape(-1, 1)

# Form and return new data set
# new_X = np.hstack((lstm_pred, gru_pred))


# Combine predictions to form new training data for the meta-learner
meta_X_train = np.concatenate((lstm_val_predictions, gru_val_predictions), axis=1)#meta_X_train = np.hstack((lstm_val_predictions, gru_val_predictions))#

print(meta_X_train.shape)



(50, 2)


# Train meta learner

In [97]:
meta_model = MetaLearner()
meta_criterion = nn.BCELoss()
meta_optimizer = optim.Adam(meta_model.parameters(), eps=1e-7) #default keras LR

meta_X_train_tensor = torch.tensor(meta_X_train, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

meta_train_dataset = TensorDataset(meta_X_train_tensor, y_val_tensor)
meta_train_loader = DataLoader(meta_train_dataset, batch_size=8, shuffle=True)

train_model(meta_model, meta_optimizer, meta_criterion, meta_train_loader, 100, return_lowest_val_loss=False)

Epoch 1/100, Training Loss: 0.6971, Training Accuracy: 0.62
Epoch 2/100, Training Loss: 0.6951, Training Accuracy: 0.64
Epoch 3/100, Training Loss: 0.6971, Training Accuracy: 0.68
Epoch 4/100, Training Loss: 0.6945, Training Accuracy: 0.68
Epoch 5/100, Training Loss: 0.6819, Training Accuracy: 0.68
Epoch 6/100, Training Loss: 0.6988, Training Accuracy: 0.68
Epoch 7/100, Training Loss: 0.6724, Training Accuracy: 0.68
Epoch 8/100, Training Loss: 0.6724, Training Accuracy: 0.68
Epoch 9/100, Training Loss: 0.6681, Training Accuracy: 0.68
Epoch 10/100, Training Loss: 0.6608, Training Accuracy: 0.68
Epoch 11/100, Training Loss: 0.6587, Training Accuracy: 0.68
Epoch 12/100, Training Loss: 0.6747, Training Accuracy: 0.68
Epoch 13/100, Training Loss: 0.6701, Training Accuracy: 0.68
Epoch 14/100, Training Loss: 0.6523, Training Accuracy: 0.68
Epoch 15/100, Training Loss: 0.6528, Training Accuracy: 0.68
Epoch 16/100, Training Loss: 0.6633, Training Accuracy: 0.68
Epoch 17/100, Training Loss: 0.64

MetaLearner(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc12): Linear(in_features=4, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)

In [98]:
from sklearn.metrics import precision_recall_fscore_support
#  the test dataset will be input into the sub-models again to produce intermediate test data for the meta-learner. Afterward, the meta-learner will use the intermediate test predictions from the sub-models to make the final predictions.
lstm_test_predictions = lstm_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()
gru_test_predictions = gru_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()

meta_X_test = np.concatenate((lstm_test_predictions, gru_test_predictions), axis=1)
meta_X_test_tensor = torch.tensor(meta_X_test, dtype=torch.float32)

meta_test_predictions = meta_model(meta_X_test_tensor).detach().numpy()
print("meta predictions", meta_test_predictions)
# Evaluation metrics
meta_test_predictions = np.round(meta_test_predictions)
accuracy = np.mean(meta_test_predictions == y_test)
print(f'Accuracy: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, meta_test_predictions, average='binary')
print(f'Precision: {precision}, Recall: {recall}, F1 Score: {f1}')



print("lstm predictions", lstm_test_predictions)
lstm_test_predictions = np.round(lstm_test_predictions)
accuracy = np.mean(lstm_test_predictions == y_test)
print(f'Accuracy for lstm: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, lstm_test_predictions, average='binary')
print(f'Precision for lstm: {precision}, Recall for lstm: {recall}, F1 Score for lstm: {f1}')

print("gru predictions", gru_test_predictions)
gru_test_predictions = np.round(gru_test_predictions)
accuracy = np.mean(gru_test_predictions == y_test)
print(f'Accuracy for gru: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, gru_test_predictions, average='binary')
print(f'Precision for gru: {precision}, Recall for gru: {recall}, F1 Score for gru: {f1}')


meta predictions [[0.64929664]
 [0.6479691 ]
 [0.64929664]
 [0.5061321 ]
 [0.5203857 ]
 [0.5141005 ]
 [0.59893566]
 [0.64670944]
 [0.5127061 ]
 [0.64929664]
 [0.64929664]
 [0.64929664]
 [0.5139834 ]
 [0.64929664]
 [0.64929664]
 [0.5250354 ]
 [0.51086676]
 [0.6267921 ]
 [0.64929664]
 [0.6358389 ]
 [0.509813  ]
 [0.5379237 ]
 [0.50855625]
 [0.6121315 ]
 [0.64929664]
 [0.64929664]
 [0.64929664]
 [0.6406439 ]
 [0.5187294 ]
 [0.5273212 ]
 [0.5611916 ]
 [0.5658608 ]
 [0.50479513]
 [0.64929664]
 [0.64921236]
 [0.64929664]
 [0.50597394]
 [0.64929664]
 [0.64772004]
 [0.64929664]
 [0.64929664]
 [0.64929664]
 [0.5078712 ]
 [0.5122336 ]
 [0.51789606]
 [0.64929664]
 [0.64929664]
 [0.50397927]
 [0.64929664]
 [0.64929664]]
Accuracy: 0.58
Precision: 0.58, Recall: 1.0, F1 Score: 0.7341772151898734
lstm predictions [[0.86785454]
 [0.31315   ]
 [0.58280754]
 [0.01543341]
 [0.0949811 ]
 [0.08523738]
 [0.507298  ]
 [0.88133276]
 [0.02538008]
 [0.6284198 ]
 [0.6126127 ]
 [0.4574455 ]
 [0.04158091]
 [0.74776

In [99]:
#Save the models
# torch.save(lstm_model.state_dict(), 'lstm_model.pth')
# torch.save(gru_model.state_dict(), 'gru_model.pth')
# torch.save(meta_model.state_dict(), 'meta_model.pth')

: 