In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pickle
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


## Cross validation for reddit meta model and twitter dataset

In [None]:
# Loading the twitter and bitcoin merged dataset
data = pd.read_csv('/content/drive/MyDrive/FYP/dataset/Merged Dataset/twitter_bitcoin_merged.csv', parse_dates=True)
data = data.rename(columns={'Unnamed: 0': 'timestamp'})
data = data.drop(columns=['compound', 'polarity', 'subjectivity', 'open', 'high', 'low'])
data.set_index('timestamp', inplace=True)

# Shift the "close" column 1 hour into the future and make it the target variable
data["target"] = data["close"].shift(-1)
data = data.iloc[:-1]

# Drop missing values
data = data.dropna()

# Selecting features
features = ['pos', 'neg', 'neu', 'close', 'volume']
data[features]

In [None]:
# Visualise the dataframe
data

In [None]:
# Split into features and target
X = data.drop('target', axis=1).values
y = data['target'].values.reshape(-1, 1)

# Split into train, validation, and test sets
train_size = int(0.8 * len(X))
test_size = len(X) - train_size 
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Scale the data
scaler_X = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

# Reshape input to be 3D [samples, timesteps, features]
n_features = X.shape[1]
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, n_features))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, n_features))

In [None]:
# Load the five saved models
model_rnn = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_rnn.h5')
model_lstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_lstm.h5')
model_gru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_gru.h5')
model_bilstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_bilstm.h5')
model_bigru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_bigru.h5')

In [None]:
# Generate predictions from the five models
preds_test_rnn = model_rnn.predict(X_test_reshaped)
preds_test_lstm = model_lstm.predict(X_test_reshaped)
preds_test_gru = model_gru.predict(X_test_reshaped)
preds_test_bilstm = model_bilstm.predict(X_test_reshaped)
preds_test_bigru = model_bigru.predict(X_test_reshaped)

# Stack the predictions into a single matrix
base_preds_test = np.column_stack((preds_test_rnn, preds_test_lstm, preds_test_gru, preds_test_bilstm, preds_test_bigru))

In [None]:
# Loading the reddit meta model
with open('/content/drive/MyDrive/FYP/ensemble_models/5_feature/reddit/lr_test_meta_model.pkl', 'rb') as f:
    reddit_model = pickle.load(f)

In [None]:
# Generate predictions from the five models
preds_test_meta = reddit_model.predict(base_preds_test)


In [None]:
# Inverse transforming the data
meta_predict_test_inv = scaler_y.inverse_transform(preds_test_meta)
y_test_actual = scaler_y.inverse_transform(y_test_scaled)

In [None]:
# Evaluate the performance of your meta model
mse = np.sqrt(mean_squared_error(y_test_actual, meta_predict_test_inv, squared=False))
mae = mean_absolute_error(y_test_actual, meta_predict_test_inv)
r2 = r2_score(y_test_actual, meta_predict_test_inv)

print(mse)
print(mae)
print(r2)

## Cross validation for twitter model and reddit dataset

In [None]:
# Loading the twitter and bitcoin merged dataset
data = pd.read_csv('/content/drive/MyDrive/FYP/dataset/Merged Dataset/reddit_bitcoin_merged.csv', parse_dates=True)
data = data.rename(columns={'Unnamed: 0': 'timestamp'})
data = data.drop(columns=['flair','compound', 'polarity', 'subjectivity', 'open', 'high', 'low'])
data.set_index('timestamp', inplace=True)

# Shift the "close" column 1 hour into the future and make it the target variable
data["target"] = data["close"].shift(-1)
data = data.iloc[:-1]

# Drop missing values
data = data.dropna()

# Selecting features
features = ['pos', 'neg', 'neu', 'close', 'volume']
data[features]

In [None]:
# Visualise the dataframe
data

In [None]:
# Split into features and target
X = data.drop('target', axis=1).values
y = data['target'].values.reshape(-1, 1)

# Split into train, validation, and test sets
train_size = int(0.8 * len(X))
test_size = len(X) - train_size 
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Scale the data
scaler_X = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)A

# Reshape input to be 3D [samples, timesteps, features]
n_features = X.shape[1]
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, n_features))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, n_features))

In [None]:
# Load the five saved models
model_rnn = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_rnn.h5')
model_lstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_lstm.h5')
model_gru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_gru.h5')
model_bilstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_bilstm.h5')
model_bigru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_bigru.h5')


In [None]:
# Load the five saved models
model_rnn = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_rnn.h5')
model_lstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_lstm.h5')
model_gru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_gru.h5')
model_bilstm = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_bilstm.h5')
model_bigru = load_model('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_bigru.h5')

In [None]:
# Generate predictions from the five models
preds_test_rnn = model_rnn.predict(X_test_reshaped)
preds_test_lstm = model_lstm.predict(X_test_reshaped)
preds_test_gru = model_gru.predict(X_test_reshaped)
preds_test_bilstm = model_bilstm.predict(X_test_reshaped)
preds_test_bigru = model_bigru.predict(X_test_reshaped)

# Stack the predictions into a single matrix
base_preds_test = np.column_stack((preds_test_rnn, preds_test_lstm, preds_test_gru, preds_test_bilstm, preds_test_bigru))

In [None]:
# Loading the reddit meta model
with open('/content/drive/MyDrive/FYP/ensemble_models/5_feature/twitter/lr_test_meta_model.pkl', 'rb') as f:
    twitter_model = pickle.load(f)

In [None]:
# Generate predictions from the five models
preds_test_meta = twitter_model.predict(base_preds_test)


In [None]:
# Inverse transforming the data
meta_predict_test_inv = scaler_y.inverse_transform(preds_test_meta)
y_test_actual = scaler_y.inverse_transform(y_test_scaled)

In [None]:
# Evaluate the performance of your meta model
mse = np.sqrt(mean_squared_error(y_test_actual, meta_predict_test_inv, squared=False))
mae = mean_absolute_error(y_test_actual, meta_predict_test_inv)
r2 = r2_score(y_test_actual, meta_predict_test_inv)

print(mse)
print(mae)
print(r2)