# Data Processing

In [None]:
import shap

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense, concatenate, RepeatVector, Input, Bidirectional, RepeatVector, TimeDistributed, GRU

from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import GRU, Dense
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

from tensorflow.keras.layers import GRU, Dense
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense


import numpy as np
import pandas as pd

from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
df_technical = pd.read_csv("Technical_indicators_final.csv", index_col=0)
df_onchain_data = pd.read_csv("onchain_data_final.csv", index_col=0)
df_sentiment_index = pd.read_csv("Sentiment_Index_final.csv", index_col=0)
df_traditional_assets = pd.read_csv("traditional_assets_data_final.csv", index_col=0)

In [None]:
print(len(df_technical), len(df_onchain_data), len(df_sentiment_index), len(df_traditional_assets))

In [None]:
df_technical['Date'] = pd.to_datetime(df_technical['Date'])
df_onchain_data['Date'] = pd.to_datetime(df_onchain_data['Date'])
df_sentiment_index['Date'] = pd.to_datetime(df_sentiment_index['Date'])
df_traditional_assets['Date'] = pd.to_datetime(df_traditional_assets['Date'])

In [None]:
df_onchain_data['Number of unique addresses per day'] = df_onchain_data['Number of unique addresses per day'].interpolate(method='linear')

In [None]:
import pandas as pd

merged_df = df_technical.merge(df_onchain_data, on='Date', how='right')
merged_df = merged_df.merge(df_sentiment_index, on='Date', how='right')
merged_df = merged_df.merge(df_traditional_assets, on='Date', how='right')

In [None]:
merged_df.head(3)

In [None]:
target = merged_df['Close'].values
dates = pd.to_datetime(merged_df['Date'])

In [None]:
def minmax_scaler(data):
    min_val = data.min(axis=0)
    max_val = data.max(axis=0)
    scaled_data = (data - min_val) / (max_val - min_val)
    return np.array(scaled_data), min_val, max_val

def minmax_inverse_transform(scaled_data, min_val, max_val):
    return scaled_data * (max_val - min_val) + min_val

In [None]:
cols = list(merged_df)[1:]
data = merged_df[cols].astype(float)

In [None]:
def minmax_scaler(data):
    min_val = data.min(axis=0)
    max_val = data.max(axis=0)
    scaled_data = (data - min_val) / (max_val - min_val)
    return np.array(scaled_data), min_val, max_val

def minmax_inverse_transform(scaled_data, min_val, max_val):
    return scaled_data * (max_val - min_val) + min_val

In [None]:
data_scaled, min_val, max_val = minmax_scaler(data)

In [None]:
# split to train data and test data
n_train = int(0.8*data_scaled.shape[0])
train_data_scaled = data_scaled[0: n_train]
train_dates = dates[0: n_train]

test_data_scaled = data_scaled[n_train:]
test_dates = dates[n_train:]

In [None]:
train_data_scaled.shape, test_data_scaled.shape

In [None]:
def reformat_data_for_LSTM(train_data_scaled, test_data_scaled, seq_len, pred_days):
    trainX = []
    trainY = []
    testX = []
    testY = []
    n_train = len(train_data_scaled)

    for i in range(seq_len, n_train - pred_days + 1):
        trainX.append(train_data_scaled[i - seq_len:i, 0:train_data_scaled.shape[1]])
        trainY.append(train_data_scaled[i + pred_days - 1:i + pred_days, 0])

    for i in range(seq_len, len(test_data_scaled) - pred_days + 1):
        testX.append(test_data_scaled[i - seq_len:i, 0:test_data_scaled.shape[1]])
        testY.append(test_data_scaled[i + pred_days - 1:i + pred_days, 0])

    trainX, trainY = np.array(trainX), np.array(trainY)
    testX, testY = np.array(testX), np.array(testY)

    return trainX, trainY, testX, testY

trainX_3, trainY_3, testX_3, testY_3 = reformat_data_for_LSTM(train_data_scaled, test_data_scaled, 3, 1)
trainX_5, trainY_5, testX_5, testY_5 = reformat_data_for_LSTM(train_data_scaled, test_data_scaled, 5, 1)
trainX_14, trainY_14, testX_14, testY_14 = reformat_data_for_LSTM(train_data_scaled, test_data_scaled, 14, 1)
trainX_30, trainY_30, testX_30, testY_30 = reformat_data_for_LSTM(train_data_scaled, test_data_scaled, 30, 1)
trainX_60, trainY_60, testX_60, testY_60 = reformat_data_for_LSTM(train_data_scaled, test_data_scaled, 60, 1)
trainX_120, trainY_120, testX_120, testY_120 = reformat_data_for_LSTM(train_data_scaled, test_data_scaled, 120, 1)

In [None]:
learning_rate=0.005 # 0.01, 0.05, 0.0001, 0.0005
BATCH_SIZE = 64 # 64, 32, 16, 8

def create_Bi_LSTM_model(input_shape, output_shape, optimizer='adam'):
    model = Sequential()
    model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=input_shape))
    model.add(Bidirectional(LSTM(32, return_sequences=False)))
    model.add(Dense(output_shape))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

def create_LSTM_model(input_shape, output_shape, optimizer='adam'):
    model = Sequential()
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dense(output_shape))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

def create_CNN_model(input_shape, output_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape, padding='same'))
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', padding='same'))
    model.add(GlobalMaxPooling1D())
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(output_shape))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

def create_GRU_model(input_shape, output_shape):
    model = Sequential()
    model.add(GRU(128, return_sequences=True, input_shape=input_shape))
    model.add(GRU(64, return_sequences=False))
    model.add(Dense(output_shape))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

def create_RNN_model(input_shape, output_shape):
    model = Sequential()
    model.add(SimpleRNN(128, return_sequences=True, input_shape=input_shape))
    model.add(SimpleRNN(64, return_sequences=False))
    model.add(Dense(output_shape))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

def MAPE(y_test, y_pred):
	return np.mean(np.abs((y_test - y_pred) / y_test)) * 100

In [None]:
input_shape = (trainX_3.shape[1], trainX_3.shape[2])
output_shape = trainY_3.shape[1]

lstm_model_3 = create_LSTM_model(input_shape, output_shape)
# Fit the model
history = lstm_model_3.fit(trainX_3, trainY_3, epochs=10, batch_size=BATCH_SIZE,
                verbose=0)
prediction_3_lstm = lstm_model_3.predict(testX_3)

gru_model_3 = create_GRU_model(input_shape, output_shape)
# Fit the model
history = gru_model_3.fit(trainX_3, trainY_3, epochs=10, batch_size=BATCH_SIZE,
                verbose=0)
prediction_3_gru = gru_model_3.predict(testX_3)

bi_lstm_model_3 = create_Bi_LSTM_model(input_shape, output_shape)
# Fit the model
history = bi_lstm_model_3.fit(trainX_3, trainY_3, epochs=10, batch_size=BATCH_SIZE,
                verbose=0)
prediction_3_bi_lstm = bi_lstm_model_3.predict(testX_3)

cnn_model_3 = create_CNN_model(input_shape, output_shape)
# Fit the model
history = cnn_model_3.fit(trainX_3, trainY_3, epochs=10, batch_size=BATCH_SIZE,
                verbose=0)
prediction_3_cnn = cnn_model_3.predict(testX_3)

rnn_model_3 = create_RNN_model(input_shape, output_shape)
# Fit the model
history = rnn_model_3.fit(trainX_3, trainY_3, epochs=10, batch_size=BATCH_SIZE,
                verbose=0)
prediction_3_rnn = rnn_model_3.predict(testX_3)

In [None]:
rnn_model_3 = create_RNN_model(input_shape, output_shape)
# Fit the model
history = rnn_model_3.fit(trainX_3, trainY_3, epochs=10, batch_size=BATCH_SIZE,
                verbose=0)
prediction_3_rnn = rnn_model_3.predict(testX_3)
mape_rnn = MAPE(testY_3, prediction_3_rnn)
print(mape_rnn)

In [None]:
gru_model_3 = create_GRU_model(input_shape, output_shape)
# Fit the model
history = gru_model_3.fit(trainX_3, trainY_3, epochs=10, batch_size=BATCH_SIZE,
                verbose=0)
prediction_3_gru = gru_model_3.predict(testX_3)
mape_gru = MAPE(testY_3, prediction_3_gru)
print(mape_gru)

In [None]:
cnn_model_3 = create_CNN_model(input_shape, output_shape)
# Fit the model
history = cnn_model_3.fit(trainX_3, trainY_3, epochs=10, batch_size=BATCH_SIZE,
                verbose=0)
prediction_3_cnn = cnn_model_3.predict(testX_3)
mape_cnn = MAPE(testY_3, prediction_3_cnn)
print(mape_cnn)

In [None]:
# prediction_3_lstm = lstm_model_3.predict(testX_3)
# prediction_3_gru = gru_model_3.predict(testX_3)
# prediction_3_bi_lstm = bi_lstm_model_3.predict(testX_3)
# prediction_3_cnn = cnn_model_3.predict(testX_3)
# prediction_3_rnn = rnn_model_3.predict(testX_3)

In [None]:
prediction_3_lstm_rev = minmax_inverse_transform(prediction_3_lstm, min_val[0], max_val[0])
prediction_3_gru_rev = minmax_inverse_transform(prediction_3_gru, min_val[0], max_val[0])
prediction_3_cnn_rev = minmax_inverse_transform(prediction_3_cnn, min_val[0], max_val[0])
prediction_3_bi_lstm_rev = minmax_inverse_transform(prediction_3_bi_lstm, min_val[0], max_val[0])
prediction_3_rnn_rev = minmax_inverse_transform(prediction_3_rnn, min_val[0], max_val[0])

In [None]:
plt.figure(figsize=(20, 10))
plt.xlabel('Date', fontsize=14)  # X축 레이블에 대한 글꼴 크기 설정
plt.ylabel('Bitcoin price(USD)', fontsize=14)  # Y축 레이블에 대한 글꼴 크기 설정
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.plot(test_dates[120:], target[-294:], color='black', linestyle='--', label='Actual Close Price')
plt.plot(test_dates[120:], prediction_3_bi_lstm_rev[-294:], color='red', label='Bi LSTM')
plt.plot(test_dates[120:], prediction_3_lstm_rev[-294:], color='blue', label='LSTM')
plt.plot(test_dates[120:], prediction_3_cnn_rev[-294:], color='green', label='CNN')
plt.plot(test_dates[120:], prediction_3_gru_rev[-294:], color='cyan', label='GRU')
plt.plot(test_dates[120:], prediction_3_rnn_rev[-294:], color='magenta', label='RNN')

plt.legend(fontsize=14)

plt.show()