# Define the level 1 models

In [88]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Hyperparameters
timesteps = 10  # Number of time steps
num_features = 5  # Number of features
n_epochs = 100  # Number of epochs
learning_rate = 0.001

# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(num_features, 60, batch_first=True)
        self.dropout1 = nn.Dropout(0.4)
        self.lstm2 = nn.LSTM(60, 55, batch_first=True)
        self.dropout2 = nn.Dropout(0.4)
        self.lstm3 = nn.LSTM(55, 40, batch_first=True)
        self.dropout3 = nn.Dropout(0.4)
        self.lstm4 = nn.LSTM(40, 55, batch_first=True)
        self.dropout4 = nn.Dropout(0.4)
        self.fc = nn.Linear(55, 1)  # Ensures the output is of size [batch_size, 1]
        self.sigmoid = nn.Sigmoid()
        
        
    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x, _ = self.lstm3(x)
        x = self.dropout3(x)
        x, _ = self.lstm4(x)
        x = self.dropout4(x)
        x = self.fc(x)   # Linear layer to map to 1 output
        x = x[:, -1, :]  # Take the last output, remove this?
        
        x = self.sigmoid(x)
        return x

# GRU Model
class GRUModel(nn.Module):
    def __init__(self):
        super(GRUModel, self).__init__()
        self.gru1 = nn.GRU(num_features, 60, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.gru2 = nn.GRU(60, 55, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.gru3 = nn.GRU(55, 40, batch_first=True)
        self.dropout3 = nn.Dropout(0.2)
        self.gru4 = nn.GRU(40, 55, batch_first=True)
        self.dropout4 = nn.Dropout(0.2)
        self.fc = nn.Linear(55, 1)  # Ensures the output is of size [batch_size, 1]
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x, _ = self.gru1(x)
        x = self.dropout1(x)
        x, _ = self.gru2(x)
        x = self.dropout2(x)
        x, _ = self.gru3(x)
        x = self.dropout3(x)
        x, _ = self.gru4(x)
        x = self.dropout4(x)
        x = self.fc(x)   # Linear layer to map to 1 output
        x = x[:, -1, :]  # Take the last output, remove this?
        
        x = self.sigmoid(x)
        return x

# Define the meta learner

In [89]:
# it's a fully-connect neuralnetwork with three layers; the activation function for this model is the Rectified Linear Unit (ReLu).
# NOTE: The paper doesn't specify the number of neurons in the hidden layers, so I'm basing on the stanford paper
class MetaLearner(nn.Module):
    def __init__(self):
        super(MetaLearner, self).__init__()
        self.fc1 = nn.Linear(2, 30)
        self.fc2 = nn.Linear(30, 25)
        self.fc3 = nn.Linear(25, 20)
        self.fc4 = nn.Linear(20, 1)
        self.sigmoid = nn.Sigmoid() 
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        # x = self.fc4(x)
        x = self.sigmoid(x) 
        return x

# Load in data

In [90]:
import pandas as pd
import numpy as np

split = 0.69  # Adjust to allocate space for validation set
val_split = 0.16  # 15% for validation, and implicitly 15% for test due to remaining percentage
sequence_length = 10
normalise = True
batch_size = 100
input_dim = 5
input_timesteps = 9
neurons = 50
epochs = 5
prediction_len = 1
dense_output = 1
drop_out = 0

# This approach does not normalize the compound scores in the range of 0 to 1
def naive():
  # Load data, modify cols whenever necessary
  dataframe = pd.read_csv("data/original_dataset/source_price.csv")

  cols = ['Adj Close', 'wsj_mean_compound', 'cnbc_mean_compound', 'fortune_mean_compound', 'reuters_mean_compound']

  len_dataframe = dataframe.shape[0]

  # Split data into train, validation, and test
  i_split = int(len(dataframe) * split)
  i_val = int(len(dataframe) * (split + val_split))

  data_train = dataframe.get(cols).values[:i_split]
  data_val = dataframe.get(cols).values[i_split:i_val]
  data_test = dataframe.get(cols).values[i_val:]

  # print(data_train[0:5,0])

  len_train = len(data_train)
  len_val = len(data_val)
  len_test = len(data_test)
  len_train_windows = None

  # Process train data
  data_windows = []
  for i in range(len_train - sequence_length):
      data_windows.append(data_train[i:i+sequence_length])
  data_windows = np.array(data_windows).astype(float)
  window_data = data_windows
  win_num = window_data.shape[0]
  col_num = window_data.shape[2]
  normalised_data = []
  record_min = []
  record_max = []

  # Normalize train data
  for win_i in range(win_num):
      normalised_window = []
      for col_i in range(0,1):
        temp_col = window_data[win_i,:,col_i]
        temp_min = min(temp_col)
        record_min.append(temp_min)
        temp_col = temp_col - temp_min
        temp_max = max(temp_col)
        record_max.append(temp_max)
        temp_col = temp_col / temp_max
        normalised_window.append(temp_col)
      for col_i in range(1,col_num):
        normalised_window.append(window_data[win_i,:,col_i])
      normalised_window = np.array(normalised_window).T
      normalised_data.append(normalised_window)
  normalised_data = np.array(normalised_data)
  x_train = normalised_data[:, :-1]
  # Classification problem now
  y_train = []
  for i in range(len_train - sequence_length):
      current_last = data_train[i+sequence_length-1, 0]
      next_first = data_train[i+sequence_length, 0]
      y_train.append(1 if next_first > current_last else 0)
  y_train = np.array(y_train)

  # Process validation data
  data_windows = []
  for i in range(len_val - sequence_length):
      data_windows.append(data_val[i:i+sequence_length])
  data_windows = np.array(data_windows).astype(float)
  window_data = data_windows
  win_num = window_data.shape[0]
  normalised_data = []

  # Normalize validation data
  for win_i in range(win_num):
      normalised_window = []
      for col_i in range(0,1):
        temp_col = window_data[win_i,:,col_i]
        temp_min = min(temp_col)
        temp_col = temp_col - temp_min
        temp_max = max(temp_col)
        temp_col = temp_col / temp_max
        normalised_window.append(temp_col)
      for col_i in range(1,col_num):
        normalised_window.append(window_data[win_i,:,col_i])
      normalised_window = np.array(normalised_window).T
      normalised_data.append(normalised_window)
  normalised_data = np.array(normalised_data)
  x_val = normalised_data[:, :-1]
  y_val = []
  for i in range(len_val - sequence_length):
      current_last = data_val[i+sequence_length-1, 0]
      next_first = data_val[i+sequence_length, 0]
      y_val.append(1 if next_first > current_last else 0)
  y_val = np.array(y_val)

  # Process test data
  data_windows = []
  for i in range(len_test - sequence_length):
      data_windows.append(data_test[i:i+sequence_length])
  data_windows = np.array(data_windows).astype(float)
  y_test_ori = data_windows[:, -1, [0]]
  window_data = data_windows
  win_num = window_data.shape[0]
  normalised_data = []

  # Normalize test data
  for win_i in range(win_num):
      normalised_window = []
      for col_i in range(0,1):
        temp_col = window_data[win_i,:,col_i]
        temp_min = min(temp_col)
        temp_col = temp_col - temp_min
        temp_max = max(temp_col)
        temp_col = temp_col / temp_max
        normalised_window.append(temp_col)
      for col_i in range(1,col_num):
        normalised_window.append(window_data[win_i,:,col_i])
      normalised_window = np.array(normalised_window).T
      normalised_data.append(normalised_window)
  normalised_data = np.array(normalised_data)
  x_test = normalised_data[:, :-1]
  y_test = []
  for i in range(len_test - sequence_length):
      current_last = data_test[i+sequence_length-1, 0]
      next_first = data_test[i+sequence_length, 0]
      y_test.append(1 if next_first > current_last else 0)
  y_test = np.array(y_test)
  return x_train, y_train, x_val, y_val, x_test, y_test

def new():
  df = pd.read_csv('data/original_dataset/source_price.csv')
  # Partition data into training, validation and test sets. Training data should be from date 12/07/2017 to 04/09/2018, validation data (from 04/10/2018 to 05/04/2018), and test data (from 05/07/2018 to 06/01/2018)

  from sklearn.preprocessing import MinMaxScaler

  # Hardcodidly extracting the exact dates for the partitioning
  df_train = df.loc[0:82]
  df_val = df.loc[83:101]
  df_test = df.loc[102:]

  # print(df_val.head())
  # print(df_val.tail())
  # print(df_test.head())
  # print(df_val)

  df_train = df_train.drop(columns=['date'])
  df_val = df_val.drop(columns=['date'])
  df_test = df_test.drop(columns=['date'])

  print("DF TRAIN", df_train.shape)
  print("DF VAL", df_val.shape)
  print("DF TEST", df_test.shape)

  sc = MinMaxScaler(feature_range=(0,1))
  print("DF TRAIN", df_train.head())
  df_train = sc.fit_transform(df_train)
  df_val = sc.transform(df_val)
  df_test = sc.transform(df_test)

  def create_sequences_numpy_classification(data, n_days):
      X, y = [], []
      for i in range(n_days, len(data) - 1): 
          # print("X")
          X.append(data[i-n_days:i])
          # print(data[i-n_days:i])
          y.append(1 if data[i][-1] - data[i-1][-1] > 0 else 0) #Classification task
          # print(data[i][-1], data[i-1][-1])
          # print("Y")
          # print(y[-1])
          
      # Delete the first column of X
      # X = np.delete(X, 0, axis=2)
      return np.array(X), np.array(y)

  # df = 
  print(timesteps)
  x_train, y_train = create_sequences_numpy_classification(df_train, 10)
  x_val, y_val = create_sequences_numpy_classification(df_val, 10)
  x_test, y_test = create_sequences_numpy_classification(df_test, 10)
  return x_train, y_train, x_val, y_val, x_test, y_test


x_train, y_train, x_val, y_val, x_test, y_test = new()

print("NEW")

print('x_train.shape', x_train.shape)
print('y_train.shape', y_train.shape)
print('x_val.shape', x_val.shape)
print('y_val.shape', y_val.shape)
print('x_test.shape', x_test.shape)
print('y_test.shape', y_test.shape)



print(x_train[0])

print(y_train[0:5])

print(x_val[0])

print(y_val[0])

print(x_test[0])

print(y_test[0])


DF TRAIN (83, 5)
DF VAL (19, 5)
DF TEST (19, 5)
DF TRAIN    wsj_mean_compound  cnbc_mean_compound  fortune_mean_compound  \
0              0.296             -0.1366                 0.0000   
1              0.000              0.0000                -0.2423   
2              0.000              0.0000                 0.0000   
3              0.000              0.0000                 0.0000   
4              0.000              0.0000                 0.0000   

   reuters_mean_compound    Adj Close  
0                    0.0  2636.979980  
1                    0.0  2651.500000  
2                    0.0  2659.989990  
3                    0.0  2664.110107  
4                    0.0  2662.850098  
10
NEW
x_train.shape (72, 10, 5)
y_train.shape (72,)
x_val.shape (8, 10, 5)
y_val.shape (8,)
x_test.shape (8, 10, 5)
y_test.shape (8,)
[[0.91548214 0.         0.82728766 0.10191495 0.19179757]
 [0.34690741 0.31576514 0.40678881 0.10191495 0.2415458 ]
 [0.34690741 0.31576514 0.82728766 0.10191495 0.2

# Instatiate the models

In [91]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Instantiate models
lstm_model = LSTMModel()
gru_model = GRUModel()
meta_model = MetaLearner()

# Define loss and optimizer
criterion = nn.BCELoss()
lstm_optimizer = optim.RMSprop(lstm_model.parameters(), lr=0.0008) # 16 batch size, 150 epochs
gru_optimizer = optim.RMSprop(gru_model.parameters(), lr=0.0008) # 16 batch size, 200 epochs
base_models_batch_size = 16
meta_optimizer = optim.Adam(meta_model.parameters(), lr=0.008) # 100 epochs, 8 batch size
meta_learner_batch_size = 8

# Train the base models

In [92]:
# Convert data to PyTorch tensors and create DataLoader
X_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

print(X_train_tensor.shape)
print(y_train_tensor.shape)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=base_models_batch_size, shuffle=False)

# Training function
def train_model(model, optimizer, criterion, train_loader, n_epochs):
    model.train()
    for epoch in range(n_epochs):
        epoch_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch.view(-1, 1))
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f'Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss/len(train_loader)}')

# Train the LSTM model
print("Training LSTM Model")
train_model(lstm_model, lstm_optimizer, criterion, train_loader, 150)

# Train the GRU model
print("Training GRU Model")
train_model(gru_model, gru_optimizer, criterion, train_loader, 200)

torch.Size([72, 10, 5])
torch.Size([72])
Training LSTM Model


Epoch 1/150, Loss: 0.6986353993415833
Epoch 2/150, Loss: 0.6938703894615174
Epoch 3/150, Loss: 0.696661901473999
Epoch 4/150, Loss: 0.6915799856185914
Epoch 5/150, Loss: 0.6926833987236023
Epoch 6/150, Loss: 0.6932829141616821
Epoch 7/150, Loss: 0.6923085927963257
Epoch 8/150, Loss: 0.6862512826919556
Epoch 9/150, Loss: 0.6850280284881591
Epoch 10/150, Loss: 0.6908631801605225
Epoch 11/150, Loss: 0.6861680746078491
Epoch 12/150, Loss: 0.6898617744445801
Epoch 13/150, Loss: 0.6925292611122131
Epoch 14/150, Loss: 0.6879832029342652
Epoch 15/150, Loss: 0.6860392928123474
Epoch 16/150, Loss: 0.687736427783966
Epoch 17/150, Loss: 0.6863203048706055
Epoch 18/150, Loss: 0.6891567945480347
Epoch 19/150, Loss: 0.6891563296318054
Epoch 20/150, Loss: 0.6858827948570252
Epoch 21/150, Loss: 0.6810502767562866
Epoch 22/150, Loss: 0.6862143635749817
Epoch 23/150, Loss: 0.6702996611595153
Epoch 24/150, Loss: 0.6837924242019653
Epoch 25/150, Loss: 0.6820971488952636
Epoch 26/150, Loss: 0.68137949705123

# Use base models to predict the validation data, this will be used as input to the Meta Learner

In [93]:
lstm_val_predictions = lstm_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy()
gru_val_predictions = gru_model(torch.tensor(x_val, dtype=torch.float32)).detach().numpy()


# Combine predictions to form new training data for the meta-learner
meta_X_train = np.concatenate((lstm_val_predictions, gru_val_predictions), axis=1)

print(meta_X_train.shape)

print(meta_X_train)

print(y_val)

print(x_val)

(8, 2)
[[0.454582   0.9168079 ]
 [0.44063616 0.98396456]
 [0.4357485  0.9908494 ]
 [0.46254846 0.8609845 ]
 [0.8470854  0.9867219 ]
 [0.8092524  0.9968315 ]
 [0.7287035  0.9963653 ]
 [0.8628865  0.9828859 ]]
[0 1 1 1 0 1 0 0]
[[[0.32028808 0.41294732 0.82825113 0.1733249  0.25994479]
  [0.31745532 0.4259708  0.75866177 0.09811817 0.20964784]
  [0.29671103 0.45758086 0.67728109 0.15906251 0.28433877]
  [0.29118965 0.50551591 0.77864232 0.18551763 0.25799164]
  [0.29693836 0.489379   0.81943248 0.13741159 0.33179172]
  [0.38502544 0.46885804 0.97367736 0.13808754 0.42960853]
  [0.3327085  0.42533342 0.94910584 0.21256231 0.43731744]
  [0.29614421 0.44631568 0.81592048 0.21472825 0.38417733]
  [0.21226419 0.41306848 0.88312378 0.18540914 0.30540945]
  [0.42276637 0.46862395 0.79618596 0.05588622 0.30592388]]

 [[0.31745532 0.4259708  0.75866177 0.09811817 0.20964784]
  [0.29671103 0.45758086 0.67728109 0.15906251 0.28433877]
  [0.29118965 0.50551591 0.77864232 0.18551763 0.25799164]
  [0.

# Train meta learner

In [94]:
meta_model = MetaLearner()
meta_criterion = nn.BCELoss()
meta_optimizer = optim.Adam(meta_model.parameters(), lr=0.008)

meta_X_train_tensor = torch.tensor(meta_X_train, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

meta_train_dataset = TensorDataset(meta_X_train_tensor, y_val_tensor)
meta_train_loader = DataLoader(meta_train_dataset, batch_size=8, shuffle=False)

train_model(meta_model, meta_optimizer, meta_criterion, meta_train_loader, 20)

Epoch 1/20, Loss: 0.6976877450942993
Epoch 2/20, Loss: 0.6921930313110352
Epoch 3/20, Loss: 0.6886435151100159
Epoch 4/20, Loss: 0.6856529712677002
Epoch 5/20, Loss: 0.6828100681304932
Epoch 6/20, Loss: 0.6790404915809631
Epoch 7/20, Loss: 0.6743289232254028
Epoch 8/20, Loss: 0.6688950061798096
Epoch 9/20, Loss: 0.6626771092414856
Epoch 10/20, Loss: 0.6551073789596558
Epoch 11/20, Loss: 0.6460602283477783
Epoch 12/20, Loss: 0.6359153389930725
Epoch 13/20, Loss: 0.625495433807373
Epoch 14/20, Loss: 0.6137111783027649
Epoch 15/20, Loss: 0.6015245914459229
Epoch 16/20, Loss: 0.5903951525688171
Epoch 17/20, Loss: 0.5790141820907593
Epoch 18/20, Loss: 0.5701298117637634
Epoch 19/20, Loss: 0.5626406073570251
Epoch 20/20, Loss: 0.557481050491333


In [95]:
from sklearn.metrics import precision_recall_fscore_support
#  the test dataset will be input into the sub-models again to produce intermediate test data for the meta-learner. Afterward, the meta-learner will use the intermediate test predictions from the sub-models to make the final predictions.
print(x_test)
lstm_test_predictions = lstm_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()
gru_test_predictions = gru_model(torch.tensor(x_test, dtype=torch.float32)).detach().numpy()

meta_X_test = np.concatenate((lstm_test_predictions, gru_test_predictions), axis=1)
meta_X_test_tensor = torch.tensor(meta_X_test, dtype=torch.float32)
print(meta_X_test_tensor)

meta_test_predictions = meta_model(meta_X_test_tensor).detach().numpy()
print(meta_test_predictions)

# Evaluation metrics
meta_test_predictions = np.round(meta_test_predictions)
print(meta_test_predictions)
accuracy = np.mean(meta_test_predictions == y_test)
print(f'Accuracy: {accuracy}')

precision, recall, f1, _ = precision_recall_fscore_support(y_test, meta_test_predictions, average='binary')
print(f'Precision: {precision}, Recall: {recall}, F1 Score: {f1}')

[[[ 0.32610957  0.498087    0.76944387  0.26759267  0.31394061]
  [ 0.38574976  0.50526396  0.94749536  0.19756792  0.31150816]
  [ 0.39698747  0.51372288  0.83485069  0.2221625   0.40014387]
  [ 0.31509484  0.51934937  0.80658625  0.18104715  0.48675784]
  [ 0.27395342  0.48169417  0.77699124  0.18770471  0.50268925]
  [ 0.40076703  0.50464802  0.82945302  0.10321635  0.51094605]
  [ 0.30328349  0.48949446  0.80394212  0.10627921  0.44694521]
  [ 0.40488323  0.50721916  0.79442986  0.14275055  0.4846675 ]
  [ 0.30483637  0.48481091  0.65770141  0.09699912  0.47668423]
  [ 0.33894627  0.4781492   0.77809782  0.14909903  0.45215308]]

 [[ 0.38574976  0.50526396  0.94749536  0.19756792  0.31150816]
  [ 0.39698747  0.51372288  0.83485069  0.2221625   0.40014387]
  [ 0.31509484  0.51934937  0.80658625  0.18104715  0.48675784]
  [ 0.27395342  0.48169417  0.77699124  0.18770471  0.50268925]
  [ 0.40076703  0.50464802  0.82945302  0.10321635  0.51094605]
  [ 0.30328349  0.48949446  0.80394212