# Importing libraries and functions

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import load_model
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import pickle

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor


# Loading, preprocessing and scaling the data

## Reddit and bitcoin merged data

In [None]:
# Load the reddit amd bitcoin merged data
data = pd.read_csv("/content/drive/MyDrive/FYP/dataset/Merged Dataset/reddit_bitcoin_merged.csv", index_col='timestamp', parse_dates=True)

data = data.drop(columns=['flair', 'compound', 'polarity', 'subjectivity', 'open', 'high', 'low'])

## Twitter and bitcoin merged data

In [None]:
# Loading the twitter and bitcoin merged dataset
data = pd.read_csv('/content/drive/MyDrive/FYP/dataset/Merged Dataset/twitter_bitcoin_merged.csv', parse_dates=True)
data = data.rename(columns={'Unnamed: 0': 'timestamp'})
data = data.drop(columns=['compound', 'polarity', 'subjectivity', 'open', 'high', 'low'])
data.set_index('timestamp', inplace=True)

In [None]:
# Shift the "close" column 1 hour into the future and make it the target variable
data["target"] = data["close"].shift(-1)
data = data.iloc[:-1]

# Drop missing values
data = data.dropna()

# Visualise the dataframe
data

In [None]:
features = ['pos', 'neg', 'neu', 'close', 'volume']
data[features]


In [None]:
# Split into features and target
X = data.drop('target', axis=1).values
y = data['target'].values.reshape(-1, 1)

# Split into train, validation, and test sets
train_size = int(0.8 * len(X))
test_size = len(X) - train_size 
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Scale the data
scaler_X = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

# Reshape input to be 3D [samples, timesteps, features]
n_features = X.shape[1]
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, n_features))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, n_features))

In [None]:
print('No. of features', n_features)
print(X_train_reshaped)
print(X_test_reshaped)

# Loading, training and testing the base models

## Reddit RNN base laerners

In [None]:
# Load the five saved models
model_rnn = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_rnn.h5')
model_lstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_lstm.h5')
model_gru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_gru.h5')
model_bilstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_bilstm.h5')
model_bigru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_bigru.h5')


## Twitter RNN base learners

In [None]:
# Load the five saved models
model_rnn = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_rnn.h5')
model_lstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_lstm.h5')
model_gru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_gru.h5')
model_bilstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_bilstm.h5')
model_bigru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_bigru.h5')


## Trainig base models 

In [None]:
# Generate predictions from the five models
preds_train_rnn = model_rnn.predict(X_train_reshaped)
preds_train_lstm = model_lstm.predict(X_train_reshaped)
preds_train_gru = model_gru.predict(X_train_reshaped)
preds_train_bilstm = model_bilstm.predict(X_train_reshaped)
preds_train_bigru = model_bigru.predict(X_train_reshaped)

# Stack the predictions into a single matrix
base_preds_train = np.column_stack((preds_train_rnn, preds_train_lstm, preds_train_gru, preds_train_bilstm, preds_train_bigru))


## Test base models

In [None]:
# Generate predictions from the five models
preds_test_rnn = model_rnn.predict(X_test_reshaped)
preds_test_lstm = model_lstm.predict(X_test_reshaped)
preds_test_gru = model_gru.predict(X_test_reshaped)
preds_test_bilstm = model_bilstm.predict(X_test_reshaped)
preds_test_bigru = model_bigru.predict(X_test_reshaped)

# Stack the predictions into a single matrix
base_preds_test = np.column_stack((preds_test_rnn, preds_test_lstm, preds_test_gru, preds_test_bilstm, preds_test_bigru))

# Stacking the base models to produce meta model

## Evaluating the regression meta models

In [None]:
def stacked_ensemble(meta_model, base_preds_train, y_train_scaled, base_preds_test, y_test_scaled, save_path=None):
    # Fit the meta-model
    meta_model.fit(base_preds_train, y_train_scaled)

    # Generate predictions from the meta-model
    meta_preds_test = meta_model.predict(base_preds_test)

    # Reshape the predictions to be two-dimensional
    meta_preds_test = meta_preds_test.reshape(-1, 1)

    # Inverse transforming the scaled data
    meta_predict_test_inv = scaler_y.inverse_transform(meta_preds_test)
    y_test_actual = scaler_y.inverse_transform(y_test_scaled)

    # Calculate the mean squared error, mean absolute error, and r2 score
    mse = np.sqrt(mean_squared_error(y_test_actual, meta_predict_test_inv, squared=False))
    mae = mean_absolute_error(y_test_actual, meta_predict_test_inv)
    r2 = r2_score(y_test_actual, meta_predict_test_inv)

    # Save the model to a file
    if save_path is not None:
        with open(save_path, 'wb') as file:
            pickle.dump(meta_model, file)

    # Return a dictionary containing the model, model predictions, and evaluation metrics
    results = {
        'model': meta_model,
        'mse': mse,
        'mae': mae,
        'r2': r2
    }
    return results


### Random forest

In [None]:
# Instantiate a random forest regression model
rf_model = RandomForestRegressor(n_estimators=100, random_state=50)

# Evaluate random forest model and save the trained meta-model
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/twitter/rf_test_meta_model.pkl'
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/reddit/rf_test_meta_model.pkl'

# rf_eval = evaluate_ensemble(rf_model, base_preds_train, y_train, base_preds_test, y_test, save_path=meta_model_path)
rf_eval = stacked_ensemble(rf_model, base_preds_train, y_train_scaled, base_preds_test, y_test_scaled, save_path=meta_model_path)

# Print the evaluation metrics
print("Evaluation metrics for the ensemble:")
for metric_name, metric_value in rf_eval.items():
    print(f"{metric_name}: {metric_value}")


### Linear Regression 

In [None]:
# Instantiate a linear regression model
lr_model = LinearRegression()

# Evaluate random forest model and save the trained meta-model
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/twitter/lr_test_meta_model.pkl'
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/reddit/lr_test_meta_model.pkl'

# lr_eval = evaluate_ensemble(lr_model, base_preds_train, y_train, base_preds_test, y_test, save_path=meta_model_path)
lr_eval = stacked_ensemble(lr_model, base_preds_train, y_train_scaled, base_preds_test, y_test_scaled, save_path=meta_model_path)

# Print the evaluation metrics
print("Evaluation metrics for the ensemble:")
for metric_name, metric_value in lr_eval.items():
    print(f"{metric_name}: {metric_value}")

### Elastic net regression

In [None]:
# Instantiate a elastic net regression model
en_model = ElasticNet(alpha=0.015, l1_ratio=0.5, random_state=70)

# Evaluate random forest model and save the trained meta-model
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/twitter/en_test_meta_model.pkl'
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/reddit/en_test_meta_model.pkl'

en_eval = stacked_ensemble(en_model, base_preds_train, y_train_scaled, base_preds_test, y_test_scaled, save_path=meta_model_path)

# Print the evaluation metrics
print("Evaluation metrics for the ensemble:")
for metric_name, metric_value in en_eval.items():
    print(f"{metric_name}: {metric_value}")

### Support vector regression

In [None]:
# Instantiate a support vector regression model
svr_model = SVR()

# Evaluate random forest model and save the trained meta-model
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/twitter/svr_test_meta_model.pkl'
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/reddit/svr_test_meta_model.pkl'

svr_eval = stacked_ensemble(svr_model, base_preds_train, y_train_scaled, base_preds_test, y_test_scaled, save_path=meta_model_path)

# Print the evaluation metrics
print("Evaluation metrics for the ensemble:")
for metric_name, metric_value in svr_eval.items():
    print(f"{metric_name}: {metric_value}")

### Decision tree regression

In [None]:
# Instantiate a decision tree regression model
dt_model = DecisionTreeRegressor(random_state=42)

# Evaluate random forest model and save the trained meta-model
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/twitter/dt_test_meta_model.pkl'
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/reddit/dt_test_meta_model.pkl'


dt_eval = stacked_ensemble(dt_model, base_preds_train, y_train_scaled, base_preds_test, y_test_scaled, save_path=meta_model_path)

# Print the evaluation metrics
print("Evaluation metrics for the ensemble:")
for metric_name, metric_value in dt_eval.items():
    print(f"{metric_name}: {metric_value}")
   

### Gradient boosting regression

In [None]:
# Instantiate a gradient boosting regression model
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)

# Evaluate random forest model and save the trained meta-model
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/twitter/gb_test_meta_model.pkl'
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/reddit/gb_test_meta_model.pkl'

gb_eval = stacked_ensemble(gb_model, base_preds_train, y_train_scaled, base_preds_test, y_test_scaled, save_path=meta_model_path)


# Print the evaluation metrics
print("Evaluation metrics for the ensemble:")
for metric_name, metric_value in gb_eval.items():
    print(f"{metric_name}: {metric_value}")

### Ada boosting regression

In [None]:
from sklearn.ensemble import AdaBoostRegressor

# Instantiate a adaboost regression model
ab_model = AdaBoostRegressor(n_estimators=100, random_state=42)

# Evaluate random forest model and save the trained meta-model
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/twitter/ada_test_meta_model.pkl'
# meta_model_path = '/content/drive/MyDrive/FYP/ensemble_models/5_feature/reddit/ada_test_meta_model.pkl'

ab_eval = stacked_ensemble(ab_model, base_preds_train, y_train_scaled, base_preds_test, y_test_scaled, save_path=meta_model_path)

# Print the evaluation metrics
print("Evaluation metrics for the ensemble:")
for metric_name, metric_value in ab_eval.items():
    print(f"{metric_name}: {metric_value}")