Data Preprocessing

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv(r"./Word_Embedding.csv", skiprows=[1])

In [2]:
df.head(1)

Unnamed: 0,Dates,Closed Price,Combined_Desc,Percentage_Change,lemmatized_text,embeddings
0,2010-06-30,1.588667,tesla roadster reaches chinas great wall tesla...,-0.251148,tesla roadster reach china great wall tesla op...,[0. 0. 0.065174 0.08753883 0...


In [3]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from ast import literal_eval

# convert string representations into NumPy arrays
def convert_str_to_array(embed_str):
    embed_str = embed_str[1:-1]
    str_nums = embed_str.strip().split()
    return np.array([float(num) for num in str_nums if num], dtype=float)

df['embeddings'] = df['embeddings'].apply(convert_str_to_array)

In [4]:
df.head(5)

Unnamed: 0,Dates,Closed Price,Combined_Desc,Percentage_Change,lemmatized_text,embeddings
0,2010-06-30,1.588667,tesla roadster reaches chinas great wall tesla...,-0.251148,tesla roadster reach china great wall tesla op...,"[0.0, 0.0, 0.065174, 0.08753883, 0.04256443, 0..."
1,2010-07-01,1.464,tesla roadster gets version 25 upgrade tesla r...,-7.847274,tesla roadster get version 25 upgrade tesla re...,"[0.0, 0.0, 0.0, 0.0, 0.06776106, 0.0, 0.0, 0.0..."
2,2010-07-02,1.28,tesla premiers roadster 25 goodwood festival s...,-12.568307,tesla premier roadster 25 goodwood festival sp...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,2010-07-06,1.074,tesla motors faces rough road electriccar busi...,-16.093748,tesla motor face rough road electriccar busine...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.07788439..."
4,2010-07-07,1.053333,electriccar lovers already lining teslas model...,-1.924298,electriccar lover already lining tesla model s...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [5]:
# time_steps is the number of previous day we are considering in a data point

# include the percentage changes of previous "time_steps" days
def create_combined_sequences(percentage_changes, embeddings, time_steps=3):
    X_combined = []
    y = []
    for i in range(time_steps, len(percentage_changes)):
        perc_change_sequence = percentage_changes[i-time_steps:i].flatten()
        current_embedding = embeddings[i]
        combined_features = np.concatenate([perc_change_sequence, current_embedding])
        X_combined.append(combined_features)
        y.append(percentage_changes[i])
    return np.array(X_combined), np.array(y)

percentage_changes = df['Percentage_Change'].fillna(0).values
embeddings = df['embeddings']

X, y = create_combined_sequences(percentage_changes, embeddings)

In [6]:
from sklearn.model_selection import train_test_split

# split the data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=9)

# split the training dataset into training and validation datasets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [7]:
display(X_train[1])

display(y_train[1])

array([ 1.72620372,  2.31126134, -1.34399379,  0.07858375,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.61719243,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.20736103,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.06964714,  0.        ,  0.08067048,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.1186376 ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.07253593,  0.        ,
        0.07146797,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.05475319,  0.        ,  0.        ,  0.        ,
        0.        ,  0.12404214,  0.        ,  0.        ,  0.        ,
        0.08392972,  0.        ,  0.        ,  0.        ,  0.07

-0.8116079581635294

Base Model 1 - LSTM

kfold to determine params

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
import numpy as np

# we are implementing forward pass in LSTM
# units determines the dimensionality or size of the hidden state(number of neurons) in a LSTM layer.
# https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html

class LSTMModel(nn.Module):
    def __init__(self, input_dim, units, dropout_rate, l2_lambda):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, units, batch_first=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(units, 1)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.dropout(x[:, -1, :])
        x = self.fc(x)
        return x

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# add a new hyperparameter, named learning rate

# The batch size specified in mini-batch SGD determines the number of samples processed in each mini-batch
# Mini-batch SGD is used within each training fold.
# In mini-batch SGD, the dataset is divided into small batches, and the model's parameters are updated based on the average gradient computed from each mini-batch
# let say batch size is 5, in the k-fold cross validation, for the first iteration, imagine the first (k-1) fold is training dataset, the last fold is validation dataset, within each fold, the batch size will split each fold into 5, and update the parameters

# This returns the average validation loss computed over all mini-batches in the validation dataset.
def train_and_evaluate_model(X_train_fold, y_train_fold, X_val_fold, y_val_fold, units, dropout_rate, learning_rate, l2_lambda, optimizer_choice, epochs, batch_size):
    train_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_train_fold), torch.Tensor(y_train_fold))
    val_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_val_fold), torch.Tensor(y_val_fold))
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

    model = LSTMModel(input_dim=X_train_fold.shape[2], units=units, dropout_rate=dropout_rate, l2_lambda=l2_lambda)
    model = model.to(device)

    criterion = nn.MSELoss()
    if optimizer_choice == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=l2_lambda)  # weight_decay is L2 regularization
    elif optimizer_choice == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=l2_lambda)
    elif optimizer_choice == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=l2_lambda)
    elif optimizer_choice == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=learning_rate, weight_decay=l2_lambda)

    model.train()
    for epoch in range(epochs):
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

    model.eval()
    total_val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            val_loss = criterion(outputs, targets)
            total_val_loss += val_loss.item()

    avg_val_loss = total_val_loss / len(val_loader)
    return avg_val_loss


Bayesian Hyperparameter Optimization

In [10]:
import hyperopt
from hyperopt import hp, fmin, tpe, Trials
import numpy as np

units_options = np.arange(50, 325, 25).tolist()
dropout_rate_options = np.arange(0.1, 0.35, 0.05).tolist()
l2_lambda_options = np.arange(0.01, 0.1, 0.01).tolist()
optimizer_options = ['sgd', 'adagrad', 'adam', 'rmsprop']
learning_rate_options = np.arange(0.01, 0.1, 0.01).tolist()

print(type(units_options))

search_space = {
    'units': hp.choice('units', units_options),
    'learning_rate': hp.choice('learning_rate', learning_rate_options),
    'dropout_rate': hp.choice('dropout_rate', dropout_rate_options),
    'l2_lambda': hp.choice('l2_lambda', l2_lambda_options),
    'optimizer_choice': hp.choice('optimizer_choice', optimizer_options)
}

<class 'list'>


In [11]:
X = np.array(X_train)
y = np.array(y_train).reshape(-1, 1)

n_splits = 7
epochs = 7
batch_size = 16

def objective(params):
    units = int(params['units'])
    learning_rate = params['learning_rate']
    dropout_rate = params['dropout_rate']
    l2_lambda = params['l2_lambda']
    optimizer_choice = params['optimizer_choice']

    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    scores = []
    for train_index, val_index in kf.split(X):
        X_train_fold, X_val_fold = X[train_index], X[val_index]
        y_train_fold, y_val_fold = y[train_index], y[val_index]

        X_train_fold = X_train_fold.reshape((X_train_fold.shape[0], 1, X_train_fold.shape[1]))
        X_val_fold = X_val_fold.reshape((X_val_fold.shape[0], 1, X_val_fold.shape[1]))

        score = train_and_evaluate_model(X_train_fold, y_train_fold, X_val_fold, y_val_fold, units, dropout_rate, learning_rate, l2_lambda, optimizer_choice, epochs, batch_size)
        scores.append(score)

    avg_score = np.mean(scores)
    return avg_score

# Perform Bayesian optimization
trials = Trials()
best = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,  # Adjust the number of evaluations as needed
    trials=trials
)

# Get the best hyperparameters
best_params = {
    'units': units_options[int(best['units'])],
    'learning_rate': learning_rate_options[int(best['learning_rate'])],
    'dropout_rate': dropout_rate_options[int(best['dropout_rate'])],
    'l2_lambda': l2_lambda_options[int(best['l2_lambda'])],
    'optimizer_choice': optimizer_options[int(best['optimizer_choice'])]
}

print("Best hyperparameters:", best_params)

  6%|▌         | 3/50 [00:35<09:24, 12.00s/trial, best loss: 13.03793507366252] 

Grid Search

In [None]:
'''
n_splits = 7
epochs = 5
batch_size = 16

units_options = np.arange(50, 525, 25).tolist()
dropout_rate_options = np.arange(0.1, 0.35, 0.05).tolist()
l2_lambda_options = np.arange(0.01, 0.1, 0.01).tolist()
optimizer_options = ['sgd', 'adagrad', 'adam', 'rmsprop']
learning_rate_options = np.arange(0.01, 0.1, 0.01).tolist()

# 把random_state 换去None
# Hyperparameter : units, learning_rate, dropout_rate, l2_lambda, optimizer_choice

kf = KFold(n_splits=n_splits, shuffle=True, random_state=None)

best_score = np.inf
best_params = {}

X = np.array(X_train)
y = np.array(y_train).reshape(-1, 1)

# Avoid Grid Search
# Use Bayesian Hyperparameter Optimization
for units in units_options:
    for learning_rate in learning_rate_options:
        for dropout_rate in dropout_rate_options:
            for l2_lambda in l2_lambda_options:
                for optimizer_choice in optimizer_options:
                    scores = []
                    for train_index, val_index in kf.split(X):
                        X_train_fold, X_val_fold = X[train_index], X[val_index]
                        y_train_fold, y_val_fold = y[train_index], y[val_index]

                        X_train_fold = X_train_fold.reshape((X_train_fold.shape[0], 1, X_train_fold.shape[1]))
                        X_val_fold = X_val_fold.reshape((X_val_fold.shape[0], 1, X_val_fold.shape[1]))

                        score = train_and_evaluate_model(X_train_fold, y_train_fold, X_val_fold, y_val_fold, units, dropout_rate, learning_rate, l2_lambda, optimizer_choice, epochs, batch_size)
                        scores.append(score)
                        print(scores)

                    avg_score = np.mean(scores)
                    print(f"Units: {units}, Learning rate: {learning_rate}, Dropout: {dropout_rate}, L2: {l2_lambda}, Optimizer: {optimizer_choice}, Avg Val Loss: {avg_score}")

                    if avg_score < best_score:
                        best_score = avg_score
                        best_params = {
                            'units': units,
                            'dropout_rate': dropout_rate,
                            'l2_lambda': l2_lambda,
                            'optimizer': optimizer_choice,
                            'learning_rate': learning_rate
                        }

print("Best avg validation loss:", best_score)
print("Best hyperparameters:", best_params)

'''


building the model and test on validation set

In [None]:
class CustomLSTM(nn.Module):
    def __init__(self, input_dim, units, dropout_rate):
        super(CustomLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=units,
                            batch_first=True, dropout=dropout_rate)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(units, 1)

    def forward(self, x):
        lstm_out, (hn, cn) = self.lstm(x)
        out = self.dropout(lstm_out[:, -1, :])
        out = self.fc(out)
        return out
    
input_dim = len(X_train[0])

# the value here should retrieved from best_params(include best_optimizer)
best_units = best_params.get('units')
best_learning_rate = best_params.get('learning_rate')
best_dropout_rate = best_params.get('dropout_rate')
best_l2_lambda = best_params.get('l2_lambda')
best_optimizer = best_params.get('optimizer_choice')

model_lstm = CustomLSTM(input_dim=input_dim, units=best_units, dropout_rate=best_dropout_rate)
model_lstm = model_lstm.to(device)

criterion = nn.MSELoss()

def bestOptimizer(x, learning_rate, l2_lambda):
    if x == 'adam':
        return optim.Adam(model_lstm.parameters(), lr=learning_rate, weight_decay=l2_lambda)  # weight_decay is L2 regularization
    elif x == 'rmsprop':
        return optim.RMSprop(model_lstm.parameters(), lr=learning_rate, weight_decay=l2_lambda)
    elif x == 'sgd':
        return optim.SGD(model_lstm.parameters(), lr=learning_rate, weight_decay=l2_lambda)
    elif x == 'adagrad':
        return optim.Adagrad(model_lstm.parameters(), lr=learning_rate, weight_decay=l2_lambda)

optimizer = bestOptimizer(best_optimizer, best_learning_rate, best_l2_lambda)

In [None]:
from torch.utils.data import TensorDataset, DataLoader

X_train_train = np.array(X_train)
X_test_val = np.array(X_val)
y_train_train = np.array(y_train).reshape(-1, 1)
y_test_val = np.array(y_val).reshape(-1, 1)

X_train_train = X_train_train.reshape((X_train_train.shape[0], 1, X_train_train.shape[1]))
X_test_val = X_test_val.reshape((X_test_val.shape[0], 1, X_test_val.shape[1]))

train_dataset = TensorDataset(torch.Tensor(X_train_train), torch.Tensor(y_train_train))
test_dataset = TensorDataset(torch.Tensor(X_test_val), torch.Tensor(y_test_val))

train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [None]:
epochs = 50

model_lstm.train()

for epoch in range(epochs):
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model_lstm(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    # print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

model_lstm.eval()
test_loss = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model_lstm(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()

avg_test_loss = test_loss / len(test_loader)
print(f'Average Test Loss: {avg_test_loss}')

save to csv the result to be input for meta

In [None]:
model_lstm.eval()
predictions = []
actuals = []

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model_lstm(inputs)

        # Move predictions and actuals to CPU and convert them to numpy arrays
        predictions.extend(outputs.cpu().numpy())
        actuals.extend(targets.cpu().numpy())

# Convert lists to numpy arrays
predictions = np.array(predictions).flatten()  # Flattening in case the outputs are multi-dimensional
actuals = np.array(actuals).flatten()

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'Predictions': predictions, 'Actuals': actuals})

# Save to CSV
df.to_csv("predictions_vs_actuals_lstm.csv", index=False)

Base Model 2 - MLP

kfold to determine params

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Subset
from sklearn.model_selection import KFold
import numpy as np

class MLPModel1(nn.Module):
    def __init__(self, input_dim, hidden_units, dropout_rate):
        super(MLPModel1, self).__init__()
        # we are using two hidden layers, fc1 and fc2
        self.fc1 = nn.Linear(input_dim, hidden_units)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(hidden_units, hidden_units)
        self.output = nn.Linear(hidden_units, 1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.output(x)
        return x

In [None]:
X_tensor = torch.tensor(X_train, dtype=torch.float32)
y_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

dataset = TensorDataset(X_tensor, y_tensor)

In [None]:
print(X_train)

In [None]:
print(X_tensor)

In [None]:
k_folds = 5
num_epochs = 50

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_and_evaluate_model(train_loader, val_loader, hidden_units, dropout_rate, learning_rate, l2_lambda, optimizer_choice, num_epochs):
    model = MLPModel1(input_dim=X_tensor.shape[1], hidden_units=hidden_units, dropout_rate=dropout_rate)
    model = model.to(device)  # Move model to the appropriate device
    criterion = nn.MSELoss()

    if optimizer_choice == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=l2_lambda)  # weight_decay is L2 regularization
    elif optimizer_choice == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=l2_lambda)
    elif optimizer_choice == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=l2_lambda)
    elif optimizer_choice == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=learning_rate, weight_decay=l2_lambda)

    model.train()
    for epoch in range(num_epochs):
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(val_loader)

Bayesian Optimization Method

In [None]:
import hyperopt
from hyperopt import hp, fmin, tpe, Trials
import numpy as np

hidden_units_options = np.arange(50, 325, 25).tolist()
dropout_rate_options = np.arange(0.1, 0.35, 0.05).tolist()
l2_lambda_options = np.arange(0.01, 0.1, 0.01).tolist()
optimizer_options = ['sgd', 'adagrad', 'adam', 'rmsprop']
learning_rate_options = np.arange(0.01, 0.1, 0.01).tolist()

search_space = {
    'hidden_units': hp.choice('hidden_units', hidden_units_options),
    'learning_rate': hp.choice('learning_rate', learning_rate_options),
    'dropout_rate': hp.choice('dropout_rate', dropout_rate_options),
    'l2_lambda': hp.choice('l2_lambda', l2_lambda_options),
    'optimizer_choice': hp.choice('optimizer_choice', optimizer_options)
}

In [None]:
k_folds = 5
num_epochs = 50



def objective(params):
    hidden_units = int(params['hidden_units'])
    learning_rate = params['learning_rate']
    dropout_rate = params['dropout_rate']
    l2_lambda = params['l2_lambda']
    optimizer_choice = params['optimizer_choice']

    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    fold_scores = []

    for fold, (train_ids, val_ids) in enumerate(kf.split(X_tensor)):
        train_subsampler = Subset(dataset, train_ids)
        val_subsampler = Subset(dataset, val_ids)

        train_loader = DataLoader(train_subsampler, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_subsampler, batch_size=32, shuffle=False)

        score = train_and_evaluate_model(train_loader, val_loader, hidden_units, dropout_rate,learning_rate, l2_lambda, optimizer_choice, num_epochs)
        fold_scores.append(score)

    avg_score = np.mean(fold_scores)
    return avg_score

# Perform Bayesian optimization
trials = Trials()
best = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,  # Adjust the number of evaluations as needed
    trials=trials
)

# Get the best hyperparameters
best_params = {
    'hidden_units': hidden_units_options[int(best['hidden_units'])],
    'learning_rate': learning_rate_options[int(best['learning_rate'])],
    'dropout_rate': dropout_rate_options[int(best['dropout_rate'])],
    'l2_lambda': l2_lambda_options[int(best['l2_lambda'])],
    'optimizer_choice': optimizer_options[int(best['optimizer_choice'])]
}

print("Best hyperparameters:", best_params)

Grid Search

In [None]:
'''
best_score = float('inf')
best_params = None

# Start the hyperparameter tuning process
for hidden_units in hyperparameters['hidden_units']:
    for dropout_rate in hyperparameters['dropout_rate']:
        for learning_rate in hyperparameters['learning_rate']:

            fold_scores = []

            kfold = KFold(n_splits=k_folds, shuffle=True)
            for fold, (train_ids, val_ids) in enumerate(kfold.split(X_tensor)):
                train_subsampler = Subset(dataset, train_ids)
                val_subsampler = Subset(dataset, val_ids)

                train_loader = DataLoader(train_subsampler, batch_size=32, shuffle=True)
                val_loader = DataLoader(val_subsampler, batch_size=32, shuffle=False)

                # Pass num_epochs to the function
                score = train_and_evaluate_model(train_loader, val_loader, hidden_units, dropout_rate, learning_rate, num_epochs)
                fold_scores.append(score)

            avg_score = np.mean(fold_scores)
            print(f"Hidden Units: {hidden_units}, Dropout: {dropout_rate}, LR: {learning_rate}, Avg Score: {avg_score}")

            if avg_score < best_score:
                best_score = avg_score
                best_params = {'hidden_units': hidden_units, 'dropout_rate': dropout_rate, 'learning_rate': learning_rate}

print("Best Score:", best_score)
print("Best Hyperparameters:", best_params)
'''

building the model and test on validation set

In [None]:
input_dim = X_tensor.shape[1]

best_hidden_units = best_params.get('hidden_units')
best_learning_rate = best_params.get('learning_rate')
best_dropout_rate = best_params.get('dropout_rate')
best_l2_lambda = best_params.get('l2_lambda')
best_optimizer = best_params.get('optimizer_choice')

model_mlp1 = MLPModel1(input_dim=input_dim, hidden_units=best_hidden_units, dropout_rate=best_dropout_rate).to(device)
model_mlp1 = model_mlp1.to(device)

criterion = nn.MSELoss()

def bestOptimizer(x, learning_rate, l2_lambda):
    if x == 'adam':
        return optim.Adam(model_lstm.parameters(), lr=learning_rate, weight_decay=l2_lambda)  # weight_decay is L2 regularization
    elif x == 'rmsprop':
        return optim.RMSprop(model_lstm.parameters(), lr=learning_rate, weight_decay=l2_lambda)
    elif x == 'sgd':
        return optim.SGD(model_lstm.parameters(), lr=learning_rate, weight_decay=l2_lambda)
    elif x == 'adagrad':
        return optim.Adagrad(model_lstm.parameters(), lr=learning_rate, weight_decay=l2_lambda)

optimizer = bestOptimizer(best_optimizer, best_learning_rate, best_l2_lambda)

In [None]:
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1).to(device)

train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
test_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
display(len(X_val_tensor))

In [None]:
# Train the model
num_epochs = 50

model_mlp1.train()

for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model_mlp1(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

    # print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

model_mlp1.eval()  # Set the model to evaluation mode
predictions_mlp = []
actuals_mlp = []

test_loss = 0

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model_mlp1(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()

        # Assuming your output and targets are in the expected shape; otherwise, adjust accordingly
        predictions_mlp.extend(outputs.cpu().numpy())
        actuals_mlp.extend(targets.cpu().numpy())

# Flatten lists if outputs are multi-dimensional
predictions_mlp = np.array(predictions_mlp).flatten()
actuals_mlp = np.array(actuals_mlp).flatten()

avg_test_loss = test_loss / len(test_loader)
print(f'Average Test Loss: {avg_test_loss}')

In [None]:
# Create a DataFrame with predictions and actuals
df_mlp = pd.DataFrame({'Predictions': predictions_mlp, 'Actuals': actuals_mlp})

# Save the DataFrame to a CSV file
df_mlp.to_csv("predictions_vs_actuals_mlp.csv", index=False)

meta model - MLP(we didn't tune it)

In [None]:
input_mlp = pd.read_csv(r"predictions_vs_actuals_mlp.csv")
input_lstm = pd.read_csv(r"predictions_vs_actuals_lstm.csv")

In [None]:
print(input_mlp.head(3))

In [None]:
print(input_lstm.head(3))

In [None]:
combined_df = input_mlp.join(input_lstm, lsuffix='_mlp', rsuffix='_lstm')

print(combined_df.head(3))

In [None]:
X_new_train = combined_df[['Predictions_mlp', 'Predictions_lstm']].values
y_new_train = combined_df['Actuals_lstm'].values

In [None]:
class MetaMLPModel(nn.Module):
    def __init__(self, input_size, output_size=1, dropout_rate=0.2):
        super(MetaMLPModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 128)
        self.fc4 = nn.Linear(128, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc4(x)  # No dropout before the output layer
        return x


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size = 2
meta_model = MetaMLPModel(input_size=input_size).to(device)

# Convert to PyTorch tensors
X_new_train_tensor = torch.tensor(X_new_train, dtype=torch.float).to(device)
y_new_train_tensor = torch.tensor(y_new_train, dtype=torch.float).view(-1, 1).to(device)

# Create TensorDataset and DataLoader
meta_train_dataset = TensorDataset(X_new_train_tensor, y_new_train_tensor)
meta_train_loader = DataLoader(dataset=meta_train_dataset, batch_size=32, shuffle=True)


# Define the loss function and the optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(meta_model.parameters(), lr=0.05, weight_decay= 0.04)  # Set a learning rate

# Training loop
meta_model.train()
epochs = 200  # Set the number of epochs
for epoch in range(epochs):
    for inputs, targets in meta_train_loader:
        optimizer.zero_grad()
        outputs = meta_model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

    # Optional: Print the loss every epoch
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')


Testing set accuracy for our stacking model

In [None]:
X_test_lstm = np.array(X_test)
y_test_lstm = np.array(y_test).reshape(-1, 1)

X_test_lstm = X_test_lstm.reshape((X_test_lstm.shape[0], 1, X_test_lstm.shape[1]))

test_dataset = TensorDataset(torch.Tensor(X_test_lstm), torch.Tensor(y_test_lstm))

test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [None]:
model_lstm.eval()
predictions = []
actuals = []

test_loss = 0

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model_lstm(inputs)

        # Move predictions and actuals to CPU and convert them to numpy arrays
        predictions.extend(outputs.cpu().numpy())
        actuals.extend(targets.cpu().numpy())
        loss = criterion(outputs, targets)
        test_loss += loss.item()

# Convert lists to numpy arrays
predictions = np.array(predictions).flatten()  # Flattening in case the outputs are multi-dimensional
actuals = np.array(actuals).flatten()

avg_test_loss = test_loss / len(test_loader)
print(f'Average Test Loss: {avg_test_loss}')

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'Predictions': predictions, 'Actuals': actuals})

# Save to CSV
df.to_csv("intermediate_lstm.csv", index=False)

In [None]:
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
model_mlp1.eval()  # Set the model to evaluation mode
predictions_mlp = []
actuals_mlp = []

test_loss = 0

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model_mlp1(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()

        # Assuming your output and targets are in the expected shape; otherwise, adjust accordingly
        predictions_mlp.extend(outputs.cpu().numpy())
        actuals_mlp.extend(targets.cpu().numpy())

# Flatten lists if outputs are multi-dimensional
predictions_mlp = np.array(predictions_mlp).flatten()
actuals_mlp = np.array(actuals_mlp).flatten()

avg_test_loss = test_loss / len(test_loader)
print(f'Average Test Loss: {avg_test_loss}')

In [None]:
# Create a DataFrame with predictions and actuals
df_mlp = pd.DataFrame({'Predictions': predictions_mlp, 'Actuals': actuals_mlp})

# Save the DataFrame to a CSV file
df_mlp.to_csv("intermediate_mlp1.csv", index=False)

In [None]:
testinput_mlp1 = pd.read_csv(r"intermediate_mlp1.csv")
testinput_lstm = pd.read_csv(r"intermediate_lstm.csv")

In [None]:
combined_df_test = testinput_mlp1.join(testinput_lstm, lsuffix='_mlp', rsuffix='_lstm')

In [None]:
X_new_test = combined_df_test[['Predictions_mlp', 'Predictions_lstm']].values
y_new_test = combined_df_test['Actuals_lstm'].values

In [None]:
X_new_test = torch.tensor(X_new_test, dtype=torch.float).to(device)
y_new_test = torch.tensor(y_new_test, dtype=torch.float).view(-1, 1).to(device)

meta_test_dataset = TensorDataset(X_new_test, y_new_test)
meta_test_loader = DataLoader(dataset=meta_test_dataset, batch_size=32, shuffle=True)

In [None]:
meta_model.eval()
test_loss = 0
with torch.no_grad():
    for inputs, targets in meta_test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = meta_model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()

avg_test_loss = test_loss / len(meta_test_loader)
print(f'Average Test Loss: {avg_test_loss}')