#1. Import Necessary Libraries, Dataset

In [24]:
# Importing Necessary Libraries and Generating Necessary Variables
import time
import math
from datetime import date
import numpy as np
from numpy.random import seed
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM

test_size = 0.2
cv_size = 0.2

N_opt=3
lstm_units_opt = 7
dropout_prob_opt = 0.5
optimizer_opt = 'adam'
epochs_opt = 1000
batch_size_opt = 64

model_seed = 100

tf.random.set_seed(model_seed)

In [2]:
# Importing Dataset 
df = pd.read_csv('/content/drive/MyDrive/SCHS/onchain.csv')
df = df.set_index('date')

In [5]:
# Selected Features Beforehand
cols = ['reserve_x', 'transactions_count_outflow_x', 'addresses_count_inflow', 'fund_flow_ratio_x', 'estimated_leverage_ratio', 'stablecoin_supply_ratio', 'reserve_y', 'reserve_usd', 'open_interest', 'hashrate', 'price_usd_close']
df = df[cols]

#2. Make Functions Needed and Generate Variables

In [3]:
# Getting MAPE of the Prediction Result
def get_mape(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Splitting Data into features and target
def get_x_y(data, N, offset):
    x, y = [], []
    for i in range(offset, len(data)):
        x.append(data.iloc[i-N:i, : -1])
        y.append(data.iloc[i, -1])
    x = np.array(x)
    y = np.array(y)
    
    return x, y

# Scale the feature for the validation or test sets
def get_x_scaled(data, N, offset):
    x_scaled, mu_list, std_list = [], [], []
    for i in range(offset, len(data)):
        mu_list.append(np.mean(data.iloc[i-N:i, : -1]))
        std_list.append(np.std(data.iloc[i-N:i, : -1]))
        x_scaled.append((data.iloc[i-N:i,: -1]-mu_list[i-offset])/std_list[i-offset])

    x_scaled = np.array(x_scaled)
    
    return x_scaled

# Scale the target for the validation or test sets. mu_list and std_list will be used to descale the prediction results later on.
def get_y_scaled(data, N, offset):
    y_scaled, y, mu_list, std_list = [], [], [], []
    for i in range(offset, len(data)):
        mu_list.append(np.mean(data[i-N:i]))
        std_list.append(np.std(data[i-N:i]))
        y_scaled.append((data[i-N:i]-mu_list[i-offset])/std_list[i-offset])
        y.append(data[i])
    y_scaled = np.array(y_scaled)
    y = np.array(y)
    
    return y_scaled, y, mu_list, std_list

#3. Divide the Dataset and Make Copies for Further Use

In [None]:
num_cv = int(cv_size*len(df))
num_train = len(df) - 2*num_cv
print("num_train = " + str(num_train))
print("num_cv = " + str(num_cv))

train = df[:num_train][df.columns]
train_cv = df[:num_train+num_cv][df.columns]
cv = df[num_train:num_train+num_cv][df.columns]

print("train.shape = " + str(train.shape))
print("train_cv.shape = " + str(train_cv.shape))
print("cv.shape = " + str(cv.shape))

In [23]:
#additional dataframe for plotting
df2 = df.copy()
df2 = df2.reset_index()
train2 = df2[:num_train][df2.columns]
cv2 = df2[num_train:num_train+num_cv][df2.columns]
train_cv2 = df2[:num_train+num_cv][df2.columns]

train2['date']= pd.to_datetime(train2['date'])
cv2['date']= pd.to_datetime(cv2['date'])
train_cv2['date']= pd.to_datetime(train_cv2['date'])

#4. Scale Training Set + Test Set Using Functions and Make Necessary Input Layer 

In [None]:
# Standard Scaling
scaler = StandardScaler()
train[train.columns] = scaler.fit_transform(train[train.columns])
print("scaler.mean_ = " + str(scaler.mean_))
print("scaler.var_ = " + str(scaler.var_))
train_scaled = train.copy()

In [None]:
train_scaled

In [None]:
# Separate features and target variable and make them 3D
x_train_scaled, y_train_scaled = get_x_y(train_scaled, N_opt, N_opt)
print(x_train_scaled.shape)
print(y_train_scaled.shape)

In [None]:
# Implement the features selected using the train_cv dataset
train_cv=train_cv[train_scaled.columns]
train_cv

In [None]:
# Get features scaled for test
x_cv_scaled = get_x_scaled(train_cv, N_opt, num_train)
x_cv_scaled.shape

In [13]:
# Get traget scaled for test and list of original price, mean, and standard deviation for descaling and seeing difference between prediction and actual.
y_cv_scaled, y_cv, mu_cv_list, std_cv_list = get_y_scaled(np.array(train_cv['price_usd_close']).reshape(-1,1), N_opt, num_train)

In [None]:
# Check the shape of data
print("x_cv_scaled.shape = " + str(x_cv_scaled.shape))
print("y_cv.shape = " + str(y_cv.shape))
print("len(mu_cv_list) = " + str(len(mu_cv_list)))
print("len(std_cv_list) = " + str(len(std_cv_list)))

#5. Build Model, Train, and Get Results

In [None]:
# LSTM with GPU
with tf.device('/device:GPU:0'):
  model = Sequential()
  model.add(LSTM(units=lstm_units_opt,  input_shape=(x_train_scaled.shape[1],10)))
  model.add(Dropout(dropout_prob_opt)) 
  model.add(Dense(1))

  model.compile(loss='mean_squared_error', optimizer=optimizer_opt)
  model.fit(x_train_scaled, y_train_scaled, epochs=epochs_opt, batch_size=batch_size_opt, verbose=2, shuffle=False)

In [None]:
# Do prediction
est_scaled = model.predict(x_cv_scaled)
est = (est_scaled * np.array(std_cv_list).reshape(-1,1)) + np.array(mu_cv_list).reshape(-1,1)
print("est.shape = " + str(est.shape))

# Calculate RMSE
rmse = math.sqrt(mean_squared_error(y_cv, est))
print("RMSE = %0.3f" % rmse)

# Calculate MAPE
mape_pct = get_mape(y_cv, est)
print("MAPE = %0.3f%%" % mape_pct)

r2 = r2_score(y_cv, est)
print("R2 = %0.3f" % r2)

In [None]:
# Plot adjusted close over time
rcParams['figure.figsize'] = 10, 8 # width 10, height 8

est_df = pd.DataFrame({'est': est.reshape(-1), 
                       'y_cv': y_cv.reshape(-1),
                       'date': cv2['date']})

ax = train2.plot(x='date', y='price_usd_close', style='b-', grid=True)
ax = cv2.plot(x='date', y='price_usd_close', style='y-', grid=True, ax=ax)
ax = est_df.plot(x='date', y='est', style='r-', grid=True, ax=ax)
ax.legend(['train', 'val', 'est'])
ax.set_xlabel("date")
ax.set_ylabel("USD")