# RNN structure base learners


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, SimpleRNN, GRU, Bidirectional, ConvLSTM2D, BatchNormalization, Flatten, Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras import regularizers
from keras.layers import Dropout


In [None]:
# Loading the reddit and bitcoin merged dataset
df = pd.read_csv('/content/drive/MyDrive/FYP/dataset/Merged Dataset/reddit_bitcoin_merged.csv', index_col='timestamp', parse_dates=True)
df = df.drop(columns=['flair', 'compound', 'polarity', 'subjectivity', 'open', 'high', 'low'])

In [None]:
# Loading the twitter and bitcoin merged dataset
df = pd.read_csv('/content/drive/MyDrive/FYP/dataset/Merged Dataset/twitter_bitcoin_merged.csv', parse_dates=True)
df = df.rename(columns={'Unnamed: 0': 'timestamp'})
df = df.drop(columns=['compound', 'polarity', 'subjectivity', 'open', 'high', 'low'])
df.set_index('timestamp', inplace=True)

In [None]:
df

## Feature scaling

In [None]:
# Create the target variable by shifting the 'close' column one hour into the future
df['target'] = df['close'].shift(-1)

# Drop missing values
df = df.dropna()

# Visualise the dataframe
df

In [None]:
features = ['pos', 'neg', 'neu', 'close', 'volume']
df[features]


In [None]:
# Split into features and target
X = df.drop('target', axis=1).values
y = df['target'].values.reshape(-1, 1)


In [None]:
# Split into train, validation, and test sets
train_size = int(0.7 * len(X))
val_size = int(0.1 * len(X))
test_size = len(X) - train_size - val_size
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]

# Scale the data
scaler_X = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_val_scaled = scaler_X.transform(X_val)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled = scaler_y.transform(y_val)
y_test_scaled = scaler_y.transform(y_test)

# Reshape input to be 3D [samples, timesteps, features]
n_features = X.shape[1]
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, n_features))
X_val_reshaped = X_val_scaled.reshape((X_val_scaled.shape[0], 1, n_features))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, n_features))

In [None]:
n_features

5

In [None]:
df

Unnamed: 0_level_0,pos,neg,neu,open,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 00:00:00,0.263000,0.000000,0.737000,28923.63,28995.13,2311.811445
2021-01-01 01:00:00,0.079167,0.093917,0.826917,28995.13,29409.99,5403.068471
2021-01-01 02:00:00,0.129125,0.049500,0.821500,29410.00,29194.65,2384.231560
2021-01-01 03:00:00,0.052200,0.101200,0.846600,29195.25,29278.40,1461.345077
2021-01-01 04:00:00,0.054091,0.047545,0.898273,29278.41,29220.31,2038.046803
...,...,...,...,...,...,...
2021-12-31 19:00:00,0.180000,0.146000,0.674000,46686.42,45728.28,3851.309020
2021-12-31 20:00:00,0.066000,0.000000,0.934000,45728.28,45879.24,1831.412020
2021-12-31 21:00:00,0.021333,0.032667,0.946000,45886.66,46333.86,1734.695850
2021-12-31 22:00:00,0.183000,0.000000,0.817000,46333.87,46303.99,991.851420


## LSTM base learner

In [None]:
# Define the model
model_lstm = Sequential()
model_lstm.add(LSTM(80, activation='relu',kernel_regularizer=regularizers.l2(0.001), input_shape=(1, n_features), return_sequences=True))
model_lstm.add(Dropout(0.2))
model_lstm.add(LSTM(60, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model_lstm.add(Dropout(0.2))
model_lstm.add(Dense(1))
model_lstm.compile(optimizer='adam', loss='mse')

# Fit the model
history = model_lstm.fit(X_train_reshaped, y_train_scaled, epochs=20, batch_size=50, validation_data=(X_val_reshaped, y_val_scaled), verbose=2, shuffle=False)


# Plot the loss and validation loss for lstm model
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Make predictions on train and test sets for lstm model
y_train_pred_lstm = model_lstm.predict(X_train_reshaped)
y_val_pred_lstm = model_lstm.predict(X_val_reshaped)
y_test_pred_lstm = model_lstm.predict(X_test_reshaped)

# Inverse transform the predictions for lstm model
y_train_pred_inv_lstm = scaler_y.inverse_transform(y_train_pred_lstm)
y_val_pred_inv_lstm = scaler_y.inverse_transform(y_val_pred_lstm)
y_test_pred_inv_lstm = scaler_y.inverse_transform(y_test_pred_lstm)

In [None]:
# Inverse transform the actual close prices for lstm model
y_train_actual_lstm = scaler_y.inverse_transform(y_train_scaled)
y_val_actual_lstm = scaler_y.inverse_transform(y_val_scaled)
y_test_actual_lstm = scaler_y.inverse_transform(y_test_scaled)

# Calculate MAE, RMSE, and R2 scores for lstm model
mae_train_lstm = mean_absolute_error(y_train_actual_lstm, y_train_pred_inv_lstm)
mae_val_lstm = mean_absolute_error(y_val_actual_lstm, y_val_pred_inv_lstm)
mae_test_lstm = mean_absolute_error(y_test_actual_lstm, y_test_pred_inv_lstm)

rmse_train_lstm = np.sqrt(mean_squared_error(y_train_actual_lstm, y_train_pred_inv_lstm, squared=False))
rmse_val_lstm = np.sqrt(mean_squared_error(y_val_actual_lstm, y_val_pred_inv_lstm, squared=False))
rmse_test_lstm = np.sqrt(mean_squared_error(y_test_actual_lstm, y_test_pred_inv_lstm, squared=False))

r2_train_lstm = r2_score(y_train_actual_lstm, y_train_pred_inv_lstm)
r2_val_lstm = r2_score(y_val_actual_lstm, y_val_pred_inv_lstm)
r2_test_lstm = r2_score(y_test_actual_lstm, y_test_pred_inv_lstm)

print("LSTM Training MAE: {:.2f}".format(mae_train_lstm))
print("LSTM Validation MAE: {:.2f}".format(mae_val_lstm))
print("LSTM Testing MAE: {:.2f}".format(mae_test_lstm))
print("LSTM Training RMSE: {:.2f}".format(rmse_train_lstm))
print("LSTM Validation RMSE: {:.2f}".format(rmse_val_lstm))
print("LSTM Testing RMSE: {:.2f}".format(rmse_test_lstm))
print("LSTM Training R2 Score: {:.4f}".format(r2_train_lstm))
print("LSTM Validation R2 Score: {:.4f}".format(r2_val_lstm))
print("LSTM Testing R2 Score: {:.4f}".format(r2_test_lstm))


In [None]:
# Save the model
# model_lstm.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_lstm.h5')
# model_lstm.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_lstm.h5')

In [None]:
# Create a dataframe for the actual and predicted close prices for lstm model
# df['timestamp'] = pd.to_datetime(df.index)

# Train data
df_train_pred = pd.DataFrame({'timestamp': df.index[:train_size], 'actual_close': y_train_actual_lstm.reshape(-1), 'predicted_close': y_train_pred_inv_lstm.reshape(-1)})

# Validation data
df_val_pred = pd.DataFrame({'timestamp': df.index[train_size:train_size+val_size], 'actual_close': y_val_actual_lstm.reshape(-1), 'predicted_close': y_val_pred_inv_lstm.reshape(-1)})

# Test data
df_test_pred = pd.DataFrame({'timestamp': df.index[train_size+val_size:], 'actual_close': y_test_actual_lstm.reshape(-1), 'predicted_close': y_test_pred_inv_lstm.reshape(-1)})

# Plot the actual and predicted close prices for lstm model
plt.figure(figsize=(12, 6))
plt.plot(df_train_pred['timestamp'], df_train_pred['actual_close'], label='Actual Train Close Price')
plt.plot(df_train_pred['timestamp'], df_train_pred['predicted_close'], label='Predicted Train Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['actual_close'], label='Actual Validation Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['predicted_close'], label='Predicted Validation Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['actual_close'], label='Actual Test Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['predicted_close'], label='Predicted Test Close Price')
plt.xlabel('Timestamp')
plt.ylabel('Close Price')
plt.legend()
plt.title('Actual vs. Predicted Close Prices')
plt.show()


In [None]:
# Calculating percentage error for lstm model
percentage_error=abs((y_test_actual_lstm - y_test_pred_inv_lstm)*100/y_test_actual_lstm)
print("Percentage error between actual and predicted bitcoin prices for LSTM = ",  percentage_error.mean(),"%")

In [None]:
# Get the last 10 rows of the data and drop the 'target' and 'datetime' columns
last_10_rows = df.tail(10).drop(['close'], axis=1)

# Scale the last 10 rows of data
last_10_rows_scaled = scaler_X.transform(last_10_rows)

# Reshape the input to be 3D [samples, timesteps, features]
last_10_rows_reshaped = last_10_rows_scaled.reshape((last_10_rows_scaled.shape[0], 1, last_10_rows_scaled.shape[1]))

# Predict the price of Bitcoin for the next hour
next_hour_price = model_lstm.predict(last_10_rows_reshaped)[-1][0]

# Inverse transform the predicted price
next_hour_price = scaler_y.inverse_transform([[next_hour_price]])[0][0]

print(f"The predicted price of Bitcoin in the next hour is {next_hour_price:.2f}")


## Simple RNN base learner

In [None]:
# Define the model
model_rnn = Sequential()
model_rnn.add(SimpleRNN(100, activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=(1, n_features), return_sequences=True))
model_rnn.add(Dropout(0.25))
model_rnn.add(SimpleRNN(80, activation='relu', kernel_regularizer=regularizers.l2(0.001), return_sequences=False))
model_rnn.add(Dropout(0.25))
model_rnn.add(Dense(1))
model_rnn.compile(optimizer='adam', loss='mse')

# Fit the model
history = model_rnn.fit(X_train_reshaped, y_train_scaled, epochs=15, batch_size=65, validation_data=(X_val_reshaped, y_val_scaled), verbose=2, shuffle=False)


# Plot the loss and validation loss for simple rnn model
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# Make predictions on train and test sets for lstm model
y_train_pred_rnn = model_rnn.predict(X_train_reshaped)
y_val_pred_rnn = model_rnn.predict(X_val_reshaped)
y_test_pred_rnn = model_rnn.predict(X_test_reshaped)

# Inverse transform the predictions for lstm model
y_train_pred_inv_rnn = scaler_y.inverse_transform(y_train_pred_rnn)
y_val_pred_inv_rnn = scaler_y.inverse_transform(y_val_pred_rnn)
y_test_pred_inv_rnn = scaler_y.inverse_transform(y_test_pred_rnn)


In [None]:
# Inverse transform the actual close prices for lstm model
y_train_actual_rnn = scaler_y.inverse_transform(y_train_scaled)
y_val_actual_rnn = scaler_y.inverse_transform(y_val_scaled)
y_test_actual_rnn = scaler_y.inverse_transform(y_test_scaled)

# Calculate MAE, RMSE, and R2 scores for lstm model
mae_train_rnn = mean_absolute_error(y_train_actual_rnn, y_train_pred_inv_rnn)
mae_val_rnn = mean_absolute_error(y_val_actual_rnn, y_val_pred_inv_rnn)
mae_test_rnn = mean_absolute_error(y_test_actual_rnn, y_test_pred_inv_rnn)

rmse_train_rnn = np.sqrt(mean_squared_error(y_train_actual_rnn, y_train_pred_inv_rnn, squared=False))
rmse_val_rnn = np.sqrt(mean_squared_error(y_val_actual_rnn, y_val_pred_inv_rnn, squared=False))
rmse_test_rnn = np.sqrt(mean_squared_error(y_test_actual_rnn, y_test_pred_inv_rnn, squared=False))

r2_train_rnn = r2_score(y_train_actual_rnn, y_train_pred_inv_rnn)
r2_val_rnn = r2_score(y_val_actual_rnn, y_val_pred_inv_rnn)
r2_test_rnn = r2_score(y_test_actual_rnn, y_test_pred_inv_rnn)

print("Simple RNN Training MAE: {:.2f}".format(mae_train_rnn))
print("Simple RNN Validation MAE: {:.2f}".format(mae_val_rnn))
print("Simple RNN Testing MAE: {:.2f}".format(mae_test_rnn))
print("Simple RNN Training RMSE: {:.2f}".format(rmse_train_rnn))
print("Simple RNN Validation RMSE: {:.2f}".format(rmse_val_rnn))
print("Simple RNN Testing RMSE: {:.2f}".format(rmse_test_rnn))
print("Simple RNN Training R2 Score: {:.4f}".format(r2_train_rnn))
print("Simple RNN Validation R2 Score: {:.4f}".format(r2_val_rnn))
print("Simple RNN Testing R2 Score: {:.4f}".format(r2_test_rnn))

In [None]:
# Save the model
# model_rnn.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_rnn.h5')
# model_rnn.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_rnn.h5')

In [None]:
# Create a dataframe for the actual and predicted close prices for rnn model
# df['timestamp'] = pd.to_datetime(df.index)

# Train data
df_train_pred = pd.DataFrame({'timestamp': df.index[:train_size], 'actual_close': y_train_actual_rnn.reshape(-1), 'predicted_close': y_train_pred_inv_rnn.reshape(-1)})

# Validation data
df_val_pred = pd.DataFrame({'timestamp': df.index[train_size:train_size+val_size], 'actual_close': y_val_actual_rnn.reshape(-1), 'predicted_close': y_val_pred_inv_rnn.reshape(-1)})

# Test data
df_test_pred = pd.DataFrame({'timestamp': df.index[train_size+val_size:], 'actual_close': y_test_actual_rnn.reshape(-1), 'predicted_close': y_test_pred_inv_rnn.reshape(-1)})

# Plot the actual and predicted close prices for rnn model
plt.figure(figsize=(12, 6))
plt.plot(df_train_pred['timestamp'], df_train_pred['actual_close'], label='Actual Train Close Price')
plt.plot(df_train_pred['timestamp'], df_train_pred['predicted_close'], label='Predicted Train Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['actual_close'], label='Actual Validation Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['predicted_close'], label='Predicted Validation Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['actual_close'], label='Actual Test Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['predicted_close'], label='Predicted Test Close Price')
plt.xlabel('Timestamp')
plt.ylabel('Close Price')
plt.legend()
plt.title('Actual vs. Predicted Close Prices')
plt.show()


In [None]:
# Calculating percentage error for simple rnn model
percentage_error=abs((y_test_actual_rnn - y_test_pred_inv_rnn)*100/y_test_actual_rnn)
print("Percentage error between actual and predicted bitcoin prices for Simple RNN = ",  percentage_error.mean(),"%")

In [None]:
# Get the last 10 rows of the data and drop the 'target' and 'datetime' columns
last_10_rows = df.tail(10).drop(['target', 'timestamp'], axis=1)

# Scale the last 10 rows of data
last_10_rows_scaled = scaler_X.transform(last_10_rows)

# Reshape the input to be 3D [samples, timesteps, features]
last_10_rows_reshaped = last_10_rows_scaled.reshape((last_10_rows_scaled.shape[0], 1, last_10_rows_scaled.shape[1]))

# Predict the price of Bitcoin for the next hour
next_hour_price = model_rnn.predict(last_10_rows_reshaped)[-1][0]

# Inverse transform the predicted price
next_hour_price = scaler_y.inverse_transform([[next_hour_price]])[0][0]

print(f"The predicted price of Bitcoin in the next hour is {next_hour_price:.2f}")

## GRU base learner

In [None]:
# Define the model
model_gru = Sequential()
model_gru.add(GRU(100, activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=(1, n_features), return_sequences=True))
model_gru.add(Dropout(0.2))
model_gru.add(GRU(75, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model_gru.add(Dropout(0.2))
model_gru.add(Dense(1))
model_gru.compile(optimizer='adam', loss='mse')

# Fit the model
history = model_gru.fit(X_train_reshaped, y_train_scaled, epochs=20, batch_size=60, validation_data=(X_val_reshaped, y_val_scaled), verbose=2, shuffle=False)


# Plot the loss and validation loss for gru model
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# Make predictions on train and test sets for gru model
y_train_pred_gru = model_gru.predict(X_train_reshaped)
y_val_pred_gru = model_gru.predict(X_val_reshaped)
y_test_pred_gru = model_gru.predict(X_test_reshaped)

# Inverse transform the predictions for gru model
y_train_pred_inv_gru = scaler_y.inverse_transform(y_train_pred_gru)
y_val_pred_inv_gru = scaler_y.inverse_transform(y_val_pred_gru)
y_test_pred_inv_gru = scaler_y.inverse_transform(y_test_pred_gru)


In [None]:
# Inverse transform the actual close prices for gru model
y_train_actual_gru = scaler_y.inverse_transform(y_train_scaled)
y_val_actual_gru = scaler_y.inverse_transform(y_val_scaled)
y_test_actual_gru = scaler_y.inverse_transform(y_test_scaled)

# Calculate MAE, RMSE, and R2 scores for gru model
mae_train_gru = mean_absolute_error(y_train_actual_gru, y_train_pred_inv_gru)
mae_val_gru = mean_absolute_error(y_val_actual_gru, y_val_pred_inv_gru)
mae_test_gru = mean_absolute_error(y_test_actual_gru, y_test_pred_inv_gru)

rmse_train_gru = np.sqrt(mean_squared_error(y_train_actual_gru, y_train_pred_inv_gru, squared=False))
rmse_val_gru = np.sqrt(mean_squared_error(y_val_actual_gru, y_val_pred_inv_gru, squared=False))
rmse_test_gru = np.sqrt(mean_squared_error(y_test_actual_gru, y_test_pred_inv_gru, squared=False))

r2_train_gru = r2_score(y_train_actual_gru, y_train_pred_inv_gru)
r2_val_gru = r2_score(y_val_actual_gru, y_val_pred_inv_gru)
r2_test_gru = r2_score(y_test_actual_gru, y_test_pred_inv_gru)

print("GRU Training MAE: {:.2f}".format(mae_train_gru))
print("GRU Validation MAE: {:.2f}".format(mae_val_gru))
print("GRU Testing MAE: {:.2f}".format(mae_test_gru))
print("GRU Training RMSE: {:.2f}".format(rmse_train_gru))
print("GRU Validation RMSE: {:.2f}".format(rmse_val_gru))
print("GRU Testing RMSE: {:.2f}".format(rmse_test_gru))
print("GRU Training R2 Score: {:.4f}".format(r2_train_gru))
print("GRU Validation R2 Score: {:.4f}".format(r2_val_gru))
print("GRU Testing R2 Score: {:.4f}".format(r2_test_gru))

In [None]:
# Save the model
# model_gru.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_gru.h5')
# model_gru.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_gru.h5')

In [None]:
# Create a dataframe for the actual and predicted close prices for gru model
# df['timestamp'] = pd.to_datetime(df.index)

# Train data
df_train_pred = pd.DataFrame({'timestamp': df.index[:train_size], 'actual_close': y_train_actual_gru.reshape(-1), 'predicted_close': y_train_pred_inv_gru.reshape(-1)})

# Validation data
df_val_pred = pd.DataFrame({'timestamp': df.index[train_size:train_size+val_size], 'actual_close': y_val_actual_gru.reshape(-1), 'predicted_close': y_val_pred_inv_gru.reshape(-1)})

# Test data
df_test_pred = pd.DataFrame({'timestamp': df.index[train_size+val_size:], 'actual_close': y_test_actual_gru.reshape(-1), 'predicted_close': y_test_pred_inv_gru.reshape(-1)})

# Plot the actual and predicted close prices for gru model
plt.figure(figsize=(12, 6))
plt.plot(df_train_pred['timestamp'], df_train_pred['actual_close'], label='Actual Train Close Price')
plt.plot(df_train_pred['timestamp'], df_train_pred['predicted_close'], label='Predicted Train Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['actual_close'], label='Actual Validation Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['predicted_close'], label='Predicted Validation Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['actual_close'], label='Actual Test Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['predicted_close'], label='Predicted Test Close Price')
plt.xlabel('Timestamp')
plt.ylabel('Close Price')
plt.legend()
plt.title('Actual vs. Predicted Close Prices')
plt.show()


In [None]:
# Calculating percentage error for gru model
percentage_error=abs((y_test_actual_gru - y_test_pred_inv_gru)*100/y_test_actual_gru)
print("Percentage error between actual and predicted bitcoin prices for GRU = ",  percentage_error.mean(),"%")

In [None]:
# Get the last 10 rows of the data and drop the 'target' and 'datetime' columns
last_10_rows = df.tail(10).drop(['target', 'timestamp'], axis=1)

# Scale the last 10 rows of data
last_10_rows_scaled = scaler_X.transform(last_10_rows)

# Reshape the input to be 3D [samples, timesteps, features]
last_10_rows_reshaped = last_10_rows_scaled.reshape((last_10_rows_scaled.shape[0], 1, last_10_rows_scaled.shape[1]))

# Predict the price of Bitcoin for the next hour
next_hour_price = model_gru.predict(last_10_rows_reshaped)[-1][0]

# Inverse transform the predicted price
next_hour_price = scaler_y.inverse_transform([[next_hour_price]])[0][0]

print(f"The predicted price of Bitcoin in the next hour is {next_hour_price:.2f}")

## Bidirectional LSTM base learner

In [None]:
# Define the model
model_bilstm = Sequential()
model_bilstm.add(Bidirectional(LSTM(100, activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=(1, n_features), return_sequences=False)))
model_bilstm.add(Dropout(0.2))
model_bilstm.add(Dense(1))
model_bilstm.compile(optimizer='adam', loss='mse')

# Fit the model
history = model_bilstm.fit(X_train_reshaped, y_train_scaled, epochs=20, batch_size=100, validation_data=(X_val_reshaped, y_val_scaled), verbose=2, shuffle=False)


# Plot the loss and validation loss for bilstm model
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# Make predictions on train and test sets for bilstm model
y_train_pred_bilstm = model_bilstm.predict(X_train_reshaped)
y_val_pred_bilstm = model_bilstm.predict(X_val_reshaped)
y_test_pred_bilstm = model_bilstm.predict(X_test_reshaped)

# Inverse transform the predictions for bilstm model
y_train_pred_inv_bilstm = scaler_y.inverse_transform(y_train_pred_bilstm)
y_val_pred_inv_bilstm = scaler_y.inverse_transform(y_val_pred_bilstm)
y_test_pred_inv_bilstm = scaler_y.inverse_transform(y_test_pred_bilstm)


In [None]:
# Inverse transform the actual close prices for bilstm model
y_train_actual_bilstm = scaler_y.inverse_transform(y_train_scaled)
y_val_actual_bilstm = scaler_y.inverse_transform(y_val_scaled)
y_test_actual_bilstm = scaler_y.inverse_transform(y_test_scaled)

# Calculate MAE, RMSE, and R2 scores for bilstm model
mae_train_bilstm = mean_absolute_error(y_train_actual_bilstm, y_train_pred_inv_bilstm)
mae_val_bilstm = mean_absolute_error(y_val_actual_bilstm, y_val_pred_inv_bilstm)
mae_test_bilstm = mean_absolute_error(y_test_actual_bilstm, y_test_pred_inv_bilstm)

rmse_train_bilstm = np.sqrt(mean_squared_error(y_train_actual_bilstm, y_train_pred_inv_bilstm, squared=False))
rmse_val_bilstm = np.sqrt(mean_squared_error(y_val_actual_bilstm, y_val_pred_inv_bilstm, squared=False))
rmse_test_bilstm = np.sqrt(mean_squared_error(y_test_actual_bilstm, y_test_pred_inv_bilstm, squared=False))

r2_train_bilstm = r2_score(y_train_actual_bilstm, y_train_pred_inv_bilstm)
r2_val_bilstm = r2_score(y_val_actual_bilstm, y_val_pred_inv_bilstm)
r2_test_bilstm = r2_score(y_test_actual_bilstm, y_test_pred_inv_bilstm)

print("BiLSTM Training MAE: {:.2f}".format(mae_train_bilstm))
print("BiLSTM Validation MAE: {:.2f}".format(mae_val_bilstm))
print("BiLSTM Testing MAE: {:.2f}".format(mae_test_bilstm))
print("BiLSTM Training RMSE: {:.2f}".format(rmse_train_bilstm))
print("BiLSTM Validation RMSE: {:.2f}".format(rmse_val_bilstm))
print("BiLSTM Testing RMSE: {:.2f}".format(rmse_test_bilstm))
print("BiLSTM Training R2 Score: {:.4f}".format(r2_train_bilstm))
print("BiLSTM Validation R2 Score: {:.4f}".format(r2_val_bilstm))
print("BiLSTM Testing R2 Score: {:.4f}".format(r2_test_bilstm))

In [None]:
# Save the model
# model_bilstm.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_bilstm.h5')
# model_bilstm.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_bilstm.h5')

In [None]:
# Create a dataframe for the actual and predicted close prices for bilstm model
# df['timestamp'] = pd.to_datetime(df.index)

# Train data
df_train_pred = pd.DataFrame({'timestamp': df.index[:train_size], 'actual_close': y_train_actual_bilstm.reshape(-1), 'predicted_close': y_train_pred_inv_bilstm.reshape(-1)})

# Validation data
df_val_pred = pd.DataFrame({'timestamp': df.index[train_size:train_size+val_size], 'actual_close': y_val_actual_bilstm.reshape(-1), 'predicted_close': y_val_pred_inv_bilstm.reshape(-1)})

# Test data
df_test_pred = pd.DataFrame({'timestamp': df.index[train_size+val_size:], 'actual_close': y_test_actual_bilstm.reshape(-1), 'predicted_close': y_test_pred_inv_bilstm.reshape(-1)})

# Plot the actual and predicted close prices for bilstm model
plt.figure(figsize=(12, 6))
plt.plot(df_train_pred['timestamp'], df_train_pred['actual_close'], label='Actual Train Close Price')
plt.plot(df_train_pred['timestamp'], df_train_pred['predicted_close'], label='Predicted Train Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['actual_close'], label='Actual Validation Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['predicted_close'], label='Predicted Validation Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['actual_close'], label='Actual Test Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['predicted_close'], label='Predicted Test Close Price')
plt.xlabel('Timestamp')
plt.ylabel('Close Price')
plt.legend()
plt.title('Actual vs. Predicted Close Prices')
plt.show()

In [None]:
# Calculating percentage error for bilstm model
percentage_error=abs((y_test_actual_bilstm - y_test_pred_inv_bilstm)*100/y_test_actual_bilstm)
print("Percentage error between actual and predicted bitcoin prices for GRU = ",  percentage_error.mean(),"%")

In [None]:
# Get the last 10 rows of the data and drop the 'target' and 'datetime' columns
last_10_rows = df.tail(10).drop(['target', 'timestamp'], axis=1)

# Scale the last 10 rows of data
last_10_rows_scaled = scaler_X.transform(last_10_rows)

# Reshape the input to be 3D [samples, timesteps, features]
last_10_rows_reshaped = last_10_rows_scaled.reshape((last_10_rows_scaled.shape[0], 1, last_10_rows_scaled.shape[1]))

# Predict the price of Bitcoin for the next hour
next_hour_price = model_bilstm.predict(last_10_rows_reshaped)[-1][0]

# Inverse transform the predicted price
next_hour_price = scaler_y.inverse_transform([[next_hour_price]])[0][0]

print(f"The predicted price of Bitcoin in the next hour is {next_hour_price:.2f}")

## Bidirectional GRU base learner

In [None]:
# Define the model
model_bigru = Sequential()
model_bigru.add(Bidirectional(GRU(100, activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=(1, n_features), return_sequences=False)))
model_bigru.add(Dropout(0.2))
model_bigru.add(Dense(1))
model_bigru.compile(optimizer='adam', loss='mse')

# Fit the model
history = model_bigru.fit(X_train_reshaped, y_train_scaled, epochs=15, batch_size=100, validation_data=(X_val_reshaped, y_val_scaled), verbose=2, shuffle=False)


# Plot the loss and validation loss for bigru model
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# Make predictions on train and test sets for bigru model
y_train_pred_bigru = model_bigru.predict(X_train_reshaped)
y_val_pred_bigru = model_bigru.predict(X_val_reshaped)
y_test_pred_bigru = model_bigru.predict(X_test_reshaped)

# Inverse transform the predictions for bigru model
y_train_pred_inv_bigru = scaler_y.inverse_transform(y_train_pred_bigru)
y_val_pred_inv_bigru = scaler_y.inverse_transform(y_val_pred_bigru)
y_test_pred_inv_bigru = scaler_y.inverse_transform(y_test_pred_bigru)

In [None]:
# Inverse transform the actual close prices for bigru model
y_train_actual_bigru = scaler_y.inverse_transform(y_train_scaled)
y_val_actual_bigru = scaler_y.inverse_transform(y_val_scaled)
y_test_actual_bigru = scaler_y.inverse_transform(y_test_scaled)

# Calculate MAE, RMSE, and R2 scores for bigru model
mae_train_bigru = mean_absolute_error(y_train_actual_bigru, y_train_pred_inv_bigru)
mae_val_bigru = mean_absolute_error(y_val_actual_bigru, y_val_pred_inv_bigru)
mae_test_bigru = mean_absolute_error(y_test_actual_bigru, y_test_pred_inv_bigru)

rmse_train_bigru = np.sqrt(mean_squared_error(y_train_actual_bigru, y_train_pred_inv_bigru, squared=False))
rmse_val_bigru = np.sqrt(mean_squared_error(y_val_actual_bigru, y_val_pred_inv_bigru, squared=False))
rmse_test_bigru = np.sqrt(mean_squared_error(y_test_actual_bigru, y_test_pred_inv_bigru, squared=False))

r2_train_bigru = r2_score(y_train_actual_bigru, y_train_pred_inv_bigru)
r2_val_bigru = r2_score(y_val_actual_bigru, y_val_pred_inv_bigru)
r2_test_bigru = r2_score(y_test_actual_bigru, y_test_pred_inv_bigru)

print("BiGRU Training MAE: {:.2f}".format(mae_train_bigru))
print("BiGRU Validation MAE: {:.2f}".format(mae_val_bigru))
print("BiGRU Testing MAE: {:.2f}".format(mae_test_bigru))
print("BiGRU Training RMSE: {:.2f}".format(rmse_train_bigru))
print("BiGRU Validation RMSE: {:.2f}".format(rmse_val_bigru))
print("BiGRU Testing RMSE: {:.2f}".format(rmse_test_bigru))
print("BiGRU Training R2 Score: {:.4f}".format(r2_train_bigru))
print("BiGRU Validation R2 Score: {:.4f}".format(r2_val_bigru))
print("BiGRU Testing R2 Score: {:.4f}".format(r2_test_bigru))

In [None]:
# Save the model
# model_bigru.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/reddit_bigru.h5')
# model_bigru.save('/content/drive/MyDrive/FYP/rnn_base_models/5_features/twitter_bigru.h5')

In [None]:
# Create a dataframe for the actual and predicted close prices for bigru model
# df['timestamp'] = pd.to_datetime(df.index)

# Train data
df_train_pred = pd.DataFrame({'timestamp': df.index[:train_size], 'actual_close': y_train_actual_bigru.reshape(-1), 'predicted_close': y_train_pred_inv_bigru.reshape(-1)})

# Validation data
df_val_pred = pd.DataFrame({'timestamp': df.index[train_size:train_size+val_size], 'actual_close': y_val_actual_bigru.reshape(-1), 'predicted_close': y_val_pred_inv_bigru.reshape(-1)})

# Test data
df_test_pred = pd.DataFrame({'timestamp': df.index[train_size+val_size:], 'actual_close': y_test_actual_bigru.reshape(-1), 'predicted_close': y_test_pred_inv_bigru.reshape(-1)})

# Plot the actual and predicted close prices for bigru model
plt.figure(figsize=(12, 6))
plt.plot(df_train_pred['timestamp'], df_train_pred['actual_close'], label='Actual Train Close Price')
plt.plot(df_train_pred['timestamp'], df_train_pred['predicted_close'], label='Predicted Train Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['actual_close'], label='Actual Validation Close Price')
plt.plot(df_val_pred['timestamp'], df_val_pred['predicted_close'], label='Predicted Validation Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['actual_close'], label='Actual Test Close Price')
plt.plot(df_test_pred['timestamp'], df_test_pred['predicted_close'], label='Predicted Test Close Price')
plt.xlabel('Timestamp')
plt.ylabel('Close Price')
plt.legend()
plt.title('Actual vs. Predicted Close Prices')
plt.show()

In [None]:
# Calculating percentage error for bigru model
percentage_error=abs((y_test_actual_bigru - y_test_pred_inv_bigru)*100/y_test_actual_bigru)
print("Percentage error between actual and predicted bitcoin prices for GRU = ",  percentage_error.mean(),"%")

In [None]:
# Get the last 10 rows of the data and drop the 'target' and 'datetime' columns
last_10_rows = df.tail(10).drop(['target', 'timestamp'], axis=1)

# Scale the last 10 rows of data
last_10_rows_scaled = scaler_X.transform(last_10_rows)

# Reshape the input to be 3D [samples, timesteps, features]
last_10_rows_reshaped = last_10_rows_scaled.reshape((last_10_rows_scaled.shape[0], 1, last_10_rows_scaled.shape[1]))

# Predict the price of Bitcoin for the next hour
next_hour_price = model_bigru.predict(last_10_rows_reshaped)[-1][0]

# Inverse transform the predicted price
next_hour_price = scaler_y.inverse_transform([[next_hour_price]])[0][0]

print(f"The predicted price of Bitcoin in the next hour is {next_hour_price:.2f}")