# Imports

In [1]:
!pip install keras-tuner

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras-tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.3.5 kt-legacy-1.0.5


In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import matplotlib.pyplot as plt

import math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, Conv1D, MaxPooling1D
from keras.layers import LSTM
from keras.regularizers import L1L2
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
from keras.callbacks import EarlyStopping
from keras_tuner.tuners import BayesianOptimization

from itertools import cycle
import plotly.express as px

# Data preparring

In [None]:
ds_path = '/content/drive/MyDrive/NN_CW_DATASETS/dataset_eth.csv'

In [None]:
eth_df = pd.read_csv(ds_path)
eth_df.drop('Number of Trades', axis=1)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2020-01-01T00:00:00.0000000Z,128.50,130.13,128.17,130.07,1820.412218
1,2020-01-01T02:00:00.0000000Z,130.34,130.54,129.55,129.80,352.082776
2,2020-01-01T04:00:00.0000000Z,129.81,130.14,129.55,129.87,211.968021
3,2020-01-01T06:00:00.0000000Z,129.96,130.33,129.78,129.92,180.606202
4,2020-01-01T08:00:00.0000000Z,129.78,130.14,129.48,129.76,193.347760
...,...,...,...,...,...,...
13023,2022-12-31T14:00:00.0000000Z,1200.60,1207.60,1199.90,1202.20,291.749469
13024,2022-12-31T16:00:00.0000000Z,1202.00,1203.00,1198.70,1200.10,216.118849
13025,2022-12-31T18:00:00.0000000Z,1200.30,1204.80,1198.60,1201.80,166.695459
13026,2022-12-31T20:00:00.0000000Z,1202.10,1202.90,1199.10,1199.30,148.339533


In [None]:
print('Null Values:', eth_df.isnull().values.sum())
print('NA values:', eth_df.isnull().values.any())

Null Values: 0
NA values: False


In [None]:
features = eth_df[['Date', 'Close', 'Low', 'High', "Open", "Volume"]]
print("Shape of close dataframe:", features.shape)

Shape of close dataframe: (13028, 6)


In [None]:
features = features[(features['Date'] > '2019-11-27') & (features['Date'] < '2022-11-27')]
close_stock = features[["Date", 'Close']]
print("Total data for prediction: ", features.shape)
print("Total data for prediction: ", features.shape)

Total data for prediction:  (12646, 6)
Total data for prediction:  (12646, 6)


In [None]:
features = features.drop('Date', axis = 1)

scaler = StandardScaler()

features_scaled = scaler.fit_transform(np.array(features).reshape(-1,5))

print(features.shape)

(12646, 5)


In [None]:
training_size = int(len(features_scaled) * 0.60)
test_size = len(features_scaled) - training_size

train_data_eth, test_data_eth = features_scaled[0:training_size, :], features_scaled[training_size:len(features_scaled), :]

print("train_data: ", train_data_eth.shape)
print("test_data: ", test_data_eth.shape)


train_data:  (7587, 5)
test_data:  (5059, 5)


In [None]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 1:]  
        dataX.append(a)
        dataY.append(dataset[i + time_step, :][0])  
    return np.array(dataX), np.array(dataY)

time_step = 20
X_train_eth, y_train_eth = create_dataset(train_data_eth, time_step)
X_test_eth, y_test_eth = create_dataset(test_data_eth, time_step)

print("X_train: ", X_train_eth.shape)
print("y_train: ", y_train_eth.shape)
print("X_test: ", X_test_eth.shape)
print("y_test: ", y_test_eth.shape)

X_train:  (7566, 20, 4)
y_train:  (7566,)
X_test:  (5038, 20, 4)
y_test:  (5038,)


In [None]:
X_train_eth[-1][-1]

array([ 1.00138668,  1.0026813 ,  1.02093319, -0.34987931])

In [None]:
X_train_eth = X_train_eth.reshape(X_train_eth.shape[0],X_train_eth.shape[1] , 4)
X_test_eth = X_test_eth.reshape(X_test_eth.shape[0],X_test_eth.shape[1] , 4)

print("X_train: ", X_train_eth.shape)
print("X_test: ", X_test_eth.shape)

X_train:  (7566, 20, 4)
X_test:  (5038, 20, 4)


# Model and training

In [None]:
# Define the model architecture
def build_model(hp):
    model = Sequential()
    model.add(LSTM(hp.Int('units_1', min_value=32, max_value=128, step=32), return_sequences=True, input_shape=(20, 4)))
    model.add(Dropout(hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(BatchNormalization())
    model.add(LSTM(hp.Int('units_2', min_value=32, max_value=128, step=32), return_sequences=False))
    model.add(Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(BatchNormalization())
    model.add(Dense(hp.Int('units_3', min_value=16, max_value=64, step=16), activation='relu'))
    model.add(Dropout(hp.Float('dropout_3', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(BatchNormalization())
    model.add(Dense(1))
    early_stopping = EarlyStopping(patience=10, restore_best_weights=True)
    model.compile(loss='mse', optimizer='adam')
    return model

# Define the Bayesian Optimization tuner
tuner = BayesianOptimization(
    build_model,
    objective='val_loss',
    max_trials=20,
    executions_per_trial=1,
    directory='bayesian_opt',
    project_name='my_model'
)

# Perform the hyperparameter search
tuner.search(X_train_eth, y_train_eth, validation_data=(X_test_eth, y_test_eth), epochs=20, batch_size=32, verbose=1, callbacks=[early_stopping])

# Retrieve the best model
best_model = tuner.get_best_models(num_models=1)[0]


In [None]:
model = Sequential()

model.add(LSTM(64, return_sequences=True, input_shape=(20, 4)))
model.add(Dropout(0.3))
model.add(BatchNormalization())

model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.3))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())

model.add(Dense(1))

early_stopping = EarlyStopping(patience=10, restore_best_weights=True)

In [None]:
model.compile(loss="mean_squared_error",optimizer="adam")

In [None]:
history = model.fit(X_train_eth,y_train_eth, validation_data=(X_test_eth,y_test_eth),epochs=30,batch_size=32,verbose=1, callbacks=[early_stopping])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
train_predict=model.predict(X_train_eth)
test_predict=model.predict(X_test_eth)

train_predict.shape, test_predict.shape



((7566, 1), (5038, 1))

In [None]:
new_scaler = StandardScaler()

new_scaler.fit_transform(np.array(features['Close']).reshape(-1,1))

array([[-1.21815855],
       [-1.2183674 ],
       [-1.21831326],
       ...,
       [-0.38529932],
       [-0.38305615],
       [-0.38676899]])

In [None]:
train_predict = new_scaler.inverse_transform(train_predict.reshape(-1,1))
test_predict = new_scaler.inverse_transform(test_predict.reshape(-1,1))

original_ytrain = new_scaler.inverse_transform(y_train_eth.reshape(-1,1)) 
original_ytest = new_scaler.inverse_transform(y_test_eth.reshape(-1,1)) 

In [None]:
print("Train data R2 score:", r2_score(original_ytrain, train_predict))
print("Test data R2 score:", r2_score(original_ytest, test_predict))

Train data R2 score: 0.9964386269585446
Test data R2 score: 0.9838510924213477


In [None]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

Train data RMSE:  67.188750527339
Train data MSE:  4514.328197424996
Train data MAE:  43.6231702325283
-------------------------------------------------------------------------------------
Test data RMSE:  139.14415276536738
Test data MSE:  19361.09524879189
Test data MAE:  101.96224049276283


In [None]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))
print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))
print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))

Train data MGD:  0.013131359775288846
Test data MGD:  0.002521278737652082
----------------------------------------------------------------------
Train data MPD:  3.4344996605351192
Test data MPD:  6.193040308296353


In [None]:
print("Train data explained variance regression score:", 
      explained_variance_score(original_ytrain, train_predict))
print("Test data explained variance regression score:", 
      explained_variance_score(original_ytest, test_predict))

Train data explained variance regression score: 0.9970389132198363
Test data explained variance regression score: 0.9899701489935435


In [None]:
look_back=time_step
trainPredictPlot = np.empty_like(features['Close'])
trainPredictPlot[:] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back] = train_predict.reshape(1,-1)
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(features['Close'])
testPredictPlot[:] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(features)-1] = test_predict.reshape(1,-1)
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock['Date'],
                       'original_close': close_stock['Close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (12646,)
Test predicted data:  (12646,)


# LINK

In [None]:
link_ds_path = '/content/drive/MyDrive/NN_CW_DATASETS/dataset_link.csv'

In [None]:
link_df = pd.read_csv(link_ds_path)
link_df.drop('Number of Trades', axis=1)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2020-10-20T12:00:00.0000000Z,10.41,10.45,10.25,10.25,443.120959
1,2020-10-20T14:00:00.0000000Z,10.44,10.45,10.32,10.34,669.866993
2,2020-10-20T16:00:00.0000000Z,10.30,10.31,10.00,10.01,1184.082638
3,2020-10-20T18:00:00.0000000Z,10.16,10.42,10.01,10.02,1458.700873
4,2020-10-20T20:00:00.0000000Z,10.04,10.16,10.04,10.06,2745.034231
...,...,...,...,...,...,...
9498,2022-12-31T14:00:00.0000000Z,5.59,5.62,5.58,5.59,7409.159660
9499,2022-12-31T16:00:00.0000000Z,5.58,5.58,5.55,5.55,2731.408163
9500,2022-12-31T18:00:00.0000000Z,5.56,5.59,5.56,5.58,3892.282928
9501,2022-12-31T20:00:00.0000000Z,5.59,5.59,5.57,5.57,751.265781


In [None]:
print('Null Values:', link_df.isnull().values.sum())
print('NA values:', link_df.isnull().values.any())

Null Values: 0
NA values: False


In [None]:
features_link = link_df[['Date', 'Close', 'Low', 'High', "Open", "Volume"]]
print("Shape of close dataframe:", features_link.shape)

Shape of close dataframe: (9503, 6)


In [None]:
features_link = features_link[(features_link['Date'] > '2019-11-27') & (features_link['Date'] < '2022-11-27')]
close_stock = features_link[["Date", 'Close']]
print("Total data for prediction: ", features_link.shape)
print("Total data for prediction: ", features_link.shape)

Total data for prediction:  (9121, 6)
Total data for prediction:  (9121, 6)


In [None]:
features_link = features_link.drop('Date', axis = 1)

scaler_link = StandardScaler()

features_link_scaled = scaler_link.fit_transform(np.array(features_link).reshape(-1,5))

print(features_link.shape)

(9121, 5)


In [None]:
training_size = int(len(features_link_scaled) * 0.60)
test_size = len(features_link_scaled) - training_size

train_data_link, test_data_link = features_link_scaled[0:training_size, :], features_link_scaled[training_size:len(features_link_scaled), :]

print("train_data: ", train_data_link.shape)
print("test_data: ", test_data_link.shape)

train_data:  (5472, 5)
test_data:  (3649, 5)


In [None]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 1:]  
        dataX.append(a)
        dataY.append(dataset[i + time_step, :][0])  
    return np.array(dataX), np.array(dataY)

time_step = 20
X_train_link, y_train_link = create_dataset(train_data_link, time_step)
X_test_link, y_test_link = create_dataset(test_data_link, time_step)

print("X_train: ", X_train_link.shape)
print("y_train: ", y_train_link.shape)
print("X_test: ", X_test_link.shape)
print("y_test: ", y_test_link.shape)

X_train:  (5451, 20, 4)
y_train:  (5451,)
X_test:  (3628, 20, 4)
y_test:  (3628,)


In [None]:
X_train_link[-1][-1]

array([-0.3367789 , -0.34079018, -0.35071354, -0.61936585])

In [None]:
X_train_link = X_train_link.reshape(X_train_link.shape[0],X_train_link.shape[1] , 4)
X_test_link = X_test_link.reshape(X_test_link.shape[0],X_test_link.shape[1] , 4)

print("X_train: ", X_train_eth.shape)
print("X_test: ", X_test_link.shape)

X_train:  (7566, 20, 4)
X_test:  (3628, 20, 4)


# LINK Results

In [None]:
train_predict_link = model.predict(X_train_link)
test_predict_link = model.predict(X_test_link)

train_predict_link.shape, test_predict_link.shape



((5451, 1), (3628, 1))

In [None]:
new_scaler_link = StandardScaler()

new_scaler_link.fit_transform(np.array(features_link['Close']).reshape(-1,1))

array([[-0.83793254],
       [-0.82869576],
       [-0.86256397],
       ...,
       [-1.15711478],
       [-1.15095692],
       [-1.15711478]])

In [None]:
train_predict_link = new_scaler_link.inverse_transform(train_predict_link.reshape(-1,1))
test_predict_link = new_scaler_link.inverse_transform(test_predict_link.reshape(-1,1))

original_ytrain_link = new_scaler_link.inverse_transform(y_train_link.reshape(-1,1)) 
original_ytest_link = new_scaler_link.inverse_transform(y_test_link.reshape(-1,1)) 

In [None]:
print("Train data R2 score:", r2_score(original_ytrain_link, train_predict_link))
print("Test data R2 score:", r2_score(original_ytest_link, test_predict_link))

Train data R2 score: 0.9508416910510167
Test data R2 score: 0.9787385109853761


In [None]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain_link, train_predict_link)))
print("Train data MSE: ", mean_squared_error(original_ytrain_link,train_predict_link))
print("Train data MAE: ", mean_absolute_error(original_ytrain_link,train_predict_link))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest_link,test_predict_link)))
print("Test data MSE: ", mean_squared_error(original_ytest_link,test_predict_link))
print("Test data MAE: ", mean_absolute_error(original_ytest_link,test_predict_link))

Train data RMSE:  1.7882932340178879
Train data MSE:  3.1979926908341563
Train data MAE:  1.0378870864221534
-------------------------------------------------------------------------------------
Test data RMSE:  0.5585628232055466
Test data MSE:  0.3119924274673508
Test data MAE:  0.44426508029740064


In [None]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain_link, train_predict_link))
print("Test data MGD: ", mean_gamma_deviance(original_ytest_link, test_predict_link))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain_link, train_predict_link))
print("Test data MPD: ", mean_poisson_deviance(original_ytest_link, test_predict_link))

Train data MGD:  0.0036133771273672423
Test data MGD:  0.004226845207638802
----------------------------------------------------------------------
Train data MPD:  0.09823278937576194
Test data MPD:  0.0336939290469511


In [None]:
print("Train data explained variance regression score:", 
      explained_variance_score(original_ytrain_link, train_predict_link))
print("Test data explained variance regression score:", 
      explained_variance_score(original_ytest_link, test_predict_link))

Train data explained variance regression score: 0.9574623240225656
Test data explained variance regression score: 0.9839633349231108


In [None]:
look_back=time_step
trainPredictPlot = np.empty_like(features_link['Close'])
trainPredictPlot[:] = np.nan
trainPredictPlot[look_back:len(train_predict_link)+look_back] = train_predict_link.reshape(1,-1)
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(features_link['Close'])
testPredictPlot[:] = np.nan
testPredictPlot[len(train_predict_link)+(look_back*2)+1:len(features_link)-1] = test_predict_link.reshape(1,-1)
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock['Date'],
                       'original_close': close_stock['Close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (9121,)
Test predicted data:  (9121,)


# ADA

In [None]:
ada_ds_path = '/content/drive/MyDrive/NN_CW_DATASETS/dataset_ada.csv'

In [None]:
ada_df = pd.read_csv(ada_ds_path)
ada_df.drop('Number of Trades', axis=1)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2021-11-24T14:00:00.0000000Z,1.64999,1.68228,1.62896,1.68228,56149.584258
1,2021-11-24T16:00:00.0000000Z,1.68115,1.68115,1.61750,1.61873,141283.267424
2,2021-11-24T18:00:00.0000000Z,1.62227,1.63055,1.60394,1.61656,39873.048678
3,2021-11-24T20:00:00.0000000Z,1.61954,1.65000,1.61767,1.64655,21604.233040
4,2021-11-24T22:00:00.0000000Z,1.65049,1.67500,1.64900,1.67266,10523.548262
...,...,...,...,...,...,...
4608,2022-12-31T12:00:00.0000000Z,0.24666,0.24742,0.24661,0.24742,11181.627909
4609,2022-12-31T14:00:00.0000000Z,0.24822,0.25016,0.24822,0.25016,4350.926696
4610,2022-12-31T18:00:00.0000000Z,0.24816,0.24816,0.24736,0.24753,12849.307687
4611,2022-12-31T20:00:00.0000000Z,0.24790,0.24790,0.24790,0.24790,89.158387


In [None]:
print('Null Values:', ada_df.isnull().values.sum())
print('NA values:', ada_df.isnull().values.any())

Null Values: 0
NA values: False


In [None]:
features_ada = ada_df[['Date', 'Close', 'Low', 'High', "Open", "Volume"]]
print("Shape of close dataframe:", features_ada.shape)

Shape of close dataframe: (4613, 6)


In [None]:
features_ada = features_ada[(features_ada['Date'] > '2019-11-27') & (features_ada['Date'] < '2022-12-31')]
close_stock_ada = features_ada[["Date", 'Close']]
print("Total data for prediction: ", features_ada.shape)
print("Total data for prediction: ", features_ada.shape)

Total data for prediction:  (4603, 6)
Total data for prediction:  (4603, 6)


In [None]:
features_ada = features_ada.drop('Date', axis = 1)

scaler_ada = StandardScaler()

features_ada_scaled = scaler_ada.fit_transform(np.array(features_ada).reshape(-1,5))

print(features_ada.shape)

(4603, 5)


In [None]:
training_size = int(len(features_ada_scaled) * 0.60)
test_size = len(features_ada_scaled) - training_size

train_data_ada, test_data_ada = features_ada_scaled[0:training_size, :], features_ada_scaled[training_size:len(features_ada_scaled), :]

print("train_data: ", train_data_ada.shape)
print("test_data: ", test_data_ada.shape)

train_data:  (2761, 5)
test_data:  (1842, 5)


In [None]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 1:]  
        dataX.append(a)
        dataY.append(dataset[i + time_step, :][0])  
    return np.array(dataX), np.array(dataY)

time_step = 20
X_train_ada, y_train_ada = create_dataset(train_data_ada, time_step)
X_test_ada, y_test_ada = create_dataset(test_data_ada, time_step)

print("X_train: ", X_train_ada.shape)
print("y_train: ", y_train_ada.shape)
print("X_test: ", X_test_ada.shape)
print("y_test: ", y_test_ada.shape)

X_train:  (2740, 20, 4)
y_train:  (2740,)
X_test:  (1821, 20, 4)
y_test:  (1821,)


In [None]:
X_train_ada = X_train_ada.reshape(X_train_ada.shape[0],X_train_ada.shape[1] , 4)
X_test_ada = X_test_ada.reshape(X_test_ada.shape[0],X_test_ada.shape[1] , 4)

print("X_train: ", X_train_ada.shape)
print("X_test: ", X_test_ada.shape)

X_train:  (2740, 20, 4)
X_test:  (1821, 20, 4)


# ADA Results

In [None]:
train_predict_ada = model.predict(X_train_ada)
test_predict_ada = model.predict(X_test_ada)

train_predict_ada.shape, test_predict_ada.shape



((2740, 1), (1821, 1))

In [None]:
new_scaler_ada = StandardScaler()

new_scaler_ada.fit_transform(np.array(features_ada['Close']).reshape(-1,1))

array([[ 2.56818775],
       [ 2.39761396],
       [ 2.39178949],
       ...,
       [-1.29182593],
       [-1.28855134],
       [-1.28817557]])

In [None]:
train_predict_ada = new_scaler_ada.inverse_transform(train_predict_ada.reshape(-1,1))
test_predict_ada = new_scaler_ada.inverse_transform(test_predict_ada.reshape(-1,1))

original_ytrain_ada = new_scaler_ada.inverse_transform(y_train_ada.reshape(-1,1)) 
original_ytest_ada = new_scaler_ada.inverse_transform(y_test_ada.reshape(-1,1)) 

In [None]:
print("Train data R2 score:", r2_score(original_ytrain_ada, train_predict_ada))
print("Test data R2 score:", r2_score(original_ytest_ada, test_predict_ada))

Train data R2 score: 0.9820795394283013
Test data R2 score: 0.946437467076016


In [None]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain_ada, train_predict_ada)))
print("Train data MSE: ", mean_squared_error(original_ytrain_ada,train_predict_ada))
print("Train data MAE: ", mean_absolute_error(original_ytrain_ada,train_predict_ada))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest_ada,test_predict_ada)))
print("Test data MSE: ", mean_squared_error(original_ytest_ada,test_predict_ada))
print("Test data MAE: ", mean_absolute_error(original_ytest_ada,test_predict_ada))

Train data RMSE:  0.04484251086387112
Train data MSE:  0.0020108507805763997
Train data MAE:  0.03252676569728086
-------------------------------------------------------------------------------------
Test data RMSE:  0.019810535542433148
Test data MSE:  0.00039245731847800707
Test data MAE:  0.014885951215779632


In [None]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain_ada, train_predict_ada))
print("Test data MGD: ", mean_gamma_deviance(original_ytest_ada, test_predict_ada))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain_ada, train_predict_ada))
print("Test data MPD: ", mean_poisson_deviance(original_ytest_ada, test_predict_ada))

Train data MGD:  0.002506974507127195
Test data MGD:  0.0019603060372252075
----------------------------------------------------------------------
Train data MPD:  0.0020165341812865448
Test data MPD:  0.0008683126200908415


In [None]:
print("Train data explained variance regression score:", 
      explained_variance_score(original_ytrain_ada, train_predict_ada))
print("Test data explained variance regression score:", 
      explained_variance_score(original_ytest_ada, test_predict_ada))

Train data explained variance regression score: 0.9867501912849008
Test data explained variance regression score: 0.9588480282897879


In [None]:
look_back=time_step
trainPredictPlot = np.empty_like(features_ada['Close'])
trainPredictPlot[:] = np.nan
trainPredictPlot[look_back:len(train_predict_ada)+look_back] = train_predict_ada.reshape(1,-1)
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(features_ada['Close'])
testPredictPlot[:] = np.nan
testPredictPlot[len(train_predict_ada)+(look_back*2)+1:len(features_ada)-1] = test_predict_ada.reshape(1,-1)
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock_ada['Date'],
                       'original_close': close_stock_ada['Close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (4603,)
Test predicted data:  (4603,)


# LTC

In [None]:
ltc_ds_path = '/content/drive/MyDrive/NN_CW_DATASETS/dataset_ltc.csv'

In [None]:
ltc_df = pd.read_csv(ltc_ds_path)
ltc_df.drop('Number of Trades', axis=1)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2020-01-01T00:00:00.0000000Z,41.09,41.53,41.00,41.41,1777.325206
1,2020-01-01T02:00:00.0000000Z,41.48,41.65,41.43,41.45,87.312863
2,2020-01-01T04:00:00.0000000Z,41.33,41.67,41.30,41.67,31.052718
3,2020-01-01T06:00:00.0000000Z,41.62,41.62,41.35,41.42,106.700787
4,2020-01-01T08:00:00.0000000Z,41.45,41.62,41.30,41.62,16.280991
...,...,...,...,...,...,...
13026,2022-12-31T14:00:00.0000000Z,70.74,70.86,69.95,70.01,999.349703
13027,2022-12-31T16:00:00.0000000Z,70.05,70.29,69.79,70.00,496.114571
13028,2022-12-31T18:00:00.0000000Z,70.13,70.23,69.87,70.09,676.552257
13029,2022-12-31T20:00:00.0000000Z,70.09,70.41,70.09,70.21,488.916411


In [None]:
print('Null Values:', ltc_df.isnull().values.sum())
print('NA values:', ltc_df.isnull().values.any())

Null Values: 0
NA values: False


In [None]:
features_ltc = ltc_df[['Date', 'Close', 'Low', 'High', "Open", "Volume"]]
print("Shape of close dataframe:", features_ltc.shape)

Shape of close dataframe: (13031, 6)


In [None]:
features_ltc = features_ltc[(features_ltc['Date'] > '2019-11-27') & (features_ltc['Date'] < '2022-12-31')]
close_stock_ltc = features_ltc[["Date", 'Close']]
print("Total data for prediction: ", features_ltc.shape)
print("Total data for prediction: ", features_ltc.shape)

Total data for prediction:  (13019, 6)
Total data for prediction:  (13019, 6)


In [None]:
features_ltc = features_ltc.drop('Date', axis = 1)

scaler_ltc = StandardScaler()

features_ltc_scaled = scaler_ltc.fit_transform(np.array(features_ltc).reshape(-1,5))

print(features_ltc.shape)

(13019, 5)


In [None]:
training_size = int(len(features_ltc_scaled) * 0.60)
test_size = len(features_ltc_scaled) - training_size

train_data_ltc, test_data_ltc = features_ltc_scaled[0:training_size, :], features_ltc_scaled[training_size:len(features_ltc_scaled), :]

print("train_data: ", train_data_ltc.shape)
print("test_data: ", test_data_ltc.shape)

train_data:  (7811, 5)
test_data:  (5208, 5)


In [None]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 1:]  
        dataX.append(a)
        dataY.append(dataset[i + time_step, :][0])  
    return np.array(dataX), np.array(dataY)

time_step = 20
X_train_ltc, y_train_ltc = create_dataset(train_data_ltc, time_step)
X_test_ltc, y_test_ltc = create_dataset(test_data_ltc, time_step)

print("X_train: ", X_train_ltc.shape)
print("y_train: ", y_train_ltc.shape)
print("X_test: ", X_test_ltc.shape)
print("y_test: ", y_test_ltc.shape)

X_train:  (7790, 20, 4)
y_train:  (7790,)
X_test:  (5187, 20, 4)
y_test:  (5187,)


In [None]:
X_train_ltc = X_train_ltc.reshape(X_train_ltc.shape[0],X_train_ltc.shape[1] , 4)
X_test_ltc = X_test_ltc.reshape(X_test_ltc.shape[0],X_test_ltc.shape[1] , 4)

print("X_train: ", X_train_ltc.shape)
print("X_test: ", X_test_ltc.shape)

X_train:  (7790, 20, 4)
X_test:  (5187, 20, 4)


# LTC Results

In [None]:
train_predict_ltc = model.predict(X_train_ltc)
test_predict_ltc = model.predict(X_test_ltc)

train_predict_ltc.shape, test_predict_ltc.shape



((7790, 1), (5187, 1))

In [None]:
new_scaler_ltc = StandardScaler()

new_scaler_ltc.fit_transform(np.array(features_ltc['Close']).reshape(-1,1))

array([[-0.99801652],
       [-0.99740922],
       [-0.99406908],
       ...,
       [-0.59674417],
       [-0.59704781],
       [-0.59613687]])

In [None]:
train_predict_ltc = new_scaler_ltc.inverse_transform(train_predict_ltc.reshape(-1,1))
test_predict_ltc = new_scaler_ltc.inverse_transform(test_predict_ltc.reshape(-1,1))

original_ytrain_ltc = new_scaler_ltc.inverse_transform(y_train_ltc.reshape(-1,1)) 
original_ytest_ltc = new_scaler_ltc.inverse_transform(y_test_ltc.reshape(-1,1)) 

In [None]:
print("Train data R2 score:", r2_score(original_ytrain_ltc, train_predict_ltc))
print("Test data R2 score:", r2_score(original_ytest_ltc, test_predict_ltc))

Train data R2 score: 0.9525297706401586
Test data R2 score: 0.985381031059513


In [None]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain_ltc, train_predict_ltc)))
print("Train data MSE: ", mean_squared_error(original_ytrain_ltc,train_predict_ltc))
print("Train data MAE: ", mean_absolute_error(original_ytrain_ltc,train_predict_ltc))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest_ltc,test_predict_ltc)))
print("Test data MSE: ", mean_squared_error(original_ytest_ltc,test_predict_ltc))
print("Test data MAE: ", mean_absolute_error(original_ytest_ltc,test_predict_ltc))

Train data RMSE:  16.01707658035133
Train data MSE:  256.546742180839
Train data MAE:  6.132115054775722
-------------------------------------------------------------------------------------
Test data RMSE:  6.170268198204306
Test data MSE:  38.07220963777142
Test data MAE:  4.705449464943958


In [None]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain_ltc, train_predict_ltc))
print("Test data MGD: ", mean_gamma_deviance(original_ytest_ltc, test_predict_ltc))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain_ltc, train_predict_ltc))
print("Test data MPD: ", mean_poisson_deviance(original_ytest_ltc, test_predict_ltc))

Train data MGD:  0.005247811734550737
Test data MGD:  0.004690319461232837
----------------------------------------------------------------------
Train data MPD:  1.0378217210591454
Test data MPD:  0.36316625674594394


In [None]:
print("Train data explained variance regression score:", 
      explained_variance_score(original_ytrain_ltc, train_predict_ltc))
print("Test data explained variance regression score:", 
      explained_variance_score(original_ytest_ltc, test_predict_ltc))

Train data explained variance regression score: 0.9566570765111
Test data explained variance regression score: 0.9919514446004158


In [None]:
look_back=time_step
trainPredictPlot = np.empty_like(features_ltc['Close'])
trainPredictPlot[:] = np.nan
trainPredictPlot[look_back:len(train_predict_ltc)+look_back] = train_predict_ltc.reshape(1,-1)
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(features_ltc['Close'])
testPredictPlot[:] = np.nan
testPredictPlot[len(train_predict_ltc)+(look_back*2)+1:len(features_ltc)-1] = test_predict_ltc.reshape(1,-1)
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock_ltc['Date'],
                       'original_close': close_stock_ltc['Close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (13019,)
Test predicted data:  (13019,)


# ***`SPLIT`***

# ETH data for volume

In [25]:
ds_path = '/content/drive/MyDrive/NN_CW_DATASETS/eth_usd_data.csv'
eth_df = pd.read_csv(ds_path)
eth_df.drop('close_time', axis=1, inplace = True)
eth_df.drop('cryptoname', axis=1, inplace = True)

eth_df

Unnamed: 0,date,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
0,2019-11-27,146,146,146,146,0.010,1.46000,1,0,0
1,2019-11-27,125.03,145.01,125.03,133,0.080,10.92040,5,0.010,1.45010
2,2019-11-27,142.20,146.92,142.10,146.58,11912.510,1716153.85640,1494,8528.020,1223895.74220
3,2019-11-27,146.53,149.34,145.73,148.26,16279.170,2407307.53830,1990,4021.500,594848.45840
4,2019-11-27,148.35,151.64,146.80,150.65,18572.870,2753414.46400,1918,1945.610,288661.36990
...,...,...,...,...,...,...,...,...,...,...
13521,2022-12-08,1231.30,1232.39,1225.17,1228.12,195590.318,240168428.70114,112498,92715.811,113850267.72166
13522,2022-12-08,1228.21,1235.57,1224.00,1232.07,270456.798,332769385.14076,144917,138887.372,170914873.30968
13523,2022-12-08,1232.08,1234.45,1228.18,1229.20,200017.174,246328747.46212,112449,89987.329,110835515.40830
13524,2022-12-08,1229.21,1247.33,1227.36,1239.80,484703.850,598984323.70488,227384,268805.626,332312019.20747


In [4]:
print('Null Values:', eth_df.isnull().values.sum())
print('NA values:', eth_df.isnull().values.any())

Null Values: 0
NA values: False


In [26]:
features = eth_df[['date', "volume", 'close', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume']]
print("Shape of close dataframe:", features.shape)

Shape of close dataframe: (13526, 7)


In [27]:
features = features[(features['date'] > '2019-11-27') & (features['date'] < '2022-12-31')]
close_stock = features[["date", 'volume']]
print("Total data for prediction: ", features.shape)
print("Total data for prediction: ", features.shape)

Total data for prediction:  (13492, 7)
Total data for prediction:  (13492, 7)


In [28]:
features = features.drop('date', axis = 1)

scaler = StandardScaler()

features_scaled = scaler.fit_transform(np.array(features).reshape(-1,6))

print(features.shape)

(13492, 6)


In [29]:
training_size = int(len(features_scaled) * 0.6)
test_size = len(features_scaled) - training_size

train_data_eth, test_data_eth = features_scaled[0:training_size, :], features_scaled[training_size:len(features_scaled), :]

print("train_data: ", train_data_eth.shape)
print("test_data: ", test_data_eth.shape)

train_data:  (8095, 6)
test_data:  (5397, 6)


In [30]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), :]  
        dataX.append(a)
        dataY.append(dataset[i + time_step, :][0])  
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train_eth, y_train_eth = create_dataset(train_data_eth, time_step)
X_test_eth, y_test_eth = create_dataset(test_data_eth, time_step)

print("X_train: ", X_train_eth.shape)
print("y_train: ", y_train_eth.shape)
print("X_test: ", X_test_eth.shape)
print("y_test: ", y_test_eth.shape)

X_train:  (8079, 15, 6)
y_train:  (8079,)
X_test:  (5381, 15, 6)
y_test:  (5381,)


In [None]:
X_train_eth[-1][-1]

array([1.66355205, 1.82524574, 4.05103601, 2.43969959, 1.57842972,
       3.88459012])

In [31]:
X_train_eth = X_train_eth.reshape(X_train_eth.shape[0],X_train_eth.shape[1] , 6)
X_test_eth = X_test_eth.reshape(X_test_eth.shape[0],X_test_eth.shape[1] , 6)

print("X_train: ", X_train_eth.shape)
print("X_test: ", X_test_eth.shape)

X_train:  (8079, 15, 6)
X_test:  (5381, 15, 6)


# Model for volume prediction

In [32]:
model = Sequential()

model.add(LSTM(64, return_sequences=True, input_shape=(15, 6)))
# model.add(Dropout(0.5))
model.add(BatchNormalization())

model.add(LSTM(64, return_sequences=False))
# model.add(Dropout(0.3))
# model.add(BatchNormalization())

model.add(Dense(16, activation='relu'))
# model.add(Dropout(0.4))
# model.add(BatchNormalization())

model.add(Dense(1))

In [33]:
model.compile(loss="mean_squared_error",optimizer="adam")

In [34]:
history = model.fit(X_train_eth,y_train_eth, validation_data=(X_test_eth,y_test_eth),epochs=30,batch_size=32,verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


# ETH Results

In [35]:
train_predict=model.predict(X_train_eth)
test_predict=model.predict(X_test_eth)

train_predict.shape, test_predict.shape



((8079, 1), (5381, 1))

In [36]:
new_scaler = StandardScaler()

new_scaler.fit_transform(np.array(features['volume']).reshape(-1,1))

array([[-0.87811112],
       [-0.92770958],
       [-0.91352099],
       ...,
       [-0.21858408],
       [ 0.8021716 ],
       [ 0.86821441]])

In [37]:
train_predict = new_scaler.inverse_transform(train_predict.reshape(-1,1))
test_predict = new_scaler.inverse_transform(test_predict.reshape(-1,1))

original_ytrain = new_scaler.inverse_transform(y_train_eth.reshape(-1,1)) 
original_ytest = new_scaler.inverse_transform(y_test_eth.reshape(-1,1)) 

In [38]:
print("Train data R2 score:", r2_score(original_ytrain, train_predict))
print("Test data R2 score:", r2_score(original_ytest, test_predict))

Train data R2 score: 0.5879460285951046
Test data R2 score: 0.4429718084735832


In [39]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

Train data RMSE:  123808.80495475708
Train data MSE:  15328620184.325083
Train data MAE:  85324.42641101156
-------------------------------------------------------------------------------------
Test data RMSE:  269505.12664107146
Test data MSE:  72633013285.81996
Test data MAE:  148779.46088057515


In [40]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))
print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))
print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))

Train data MGD:  0.2832681964175494
Test data MGD:  0.34927223248027145
----------------------------------------------------------------------
Train data MPD:  50773.90700092534
Test data MPD:  118167.06737340815


In [41]:
print("Train data explained variance regression score:", 
      explained_variance_score(original_ytrain, train_predict))
print("Test data explained variance regression score:", 
      explained_variance_score(original_ytest, test_predict))

Train data explained variance regression score: 0.6014517296215247
Test data explained variance regression score: 0.44623023456290545


In [42]:
look_back=time_step
trainPredictPlot = np.empty_like(features['volume'])
trainPredictPlot[:] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back] = train_predict.reshape(1,-1)
print("Train predicted data: ", trainPredictPlot.shape)

testPredictPlot = np.empty_like(features['volume'])
testPredictPlot[:] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(features)-1] = test_predict.reshape(1,-1)
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock['date'],
                       'original_close': close_stock['volume'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

plotdf['original_close'] = pd.to_numeric(plotdf['original_close'])

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (13492,)
Test predicted data:  (13492,)


# ADA data for volume

In [None]:
ada_ds_path = '/content/drive/MyDrive/NN_CW_DATASETS/ada_usdt_data.csv'
ada_df = pd.read_csv(ada_ds_path)
ada_df.drop('close_time', axis=1, inplace = True)
ada_df.drop('cryptoname', axis=1, inplace = True)

ada_df

In [46]:
print('Null Values:', ada_df.isnull().values.sum())
print('NA values:', ada_df.isnull().values.any())

Null Values: 0
NA values: False


In [52]:
features_ada = ada_df[['date', "volume", 'close', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume']]
print("Shape of close dataframe:", features_ada.shape)

Shape of close dataframe: (13045, 7)


In [53]:
features_ada = features_ada[(features_ada['date'] > '2019-11-27') & (features_ada['date'] < '2022-12-31')]
close_stock_ada = features_ada[["date", 'volume']]
print("Total data for prediction: ", features_ada.shape)
print("Total data for prediction: ", features_ada.shape)

Total data for prediction:  (13019, 7)
Total data for prediction:  (13019, 7)


In [54]:
features_ada = features_ada.drop('date', axis = 1)

scaler_ada = StandardScaler()

features_ada_scaled = scaler_ada.fit_transform(np.array(features_ada).reshape(-1,6))

print(features_ada.shape)

(13019, 6)


In [55]:
training_size = int(len(features_ada_scaled) * 0.6)
test_size = len(features_ada_scaled) - training_size

train_data_ada, test_data_ada = features_ada_scaled[0:training_size, :], features_ada_scaled[training_size:len(features_ada_scaled), :]

print("train_data: ", train_data_ada.shape)
print("test_data: ", test_data_ada.shape)

train_data:  (7811, 6)
test_data:  (5208, 6)


In [56]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), :]  
        dataX.append(a)
        dataY.append(dataset[i + time_step, :][0])  
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train_ada, y_train_ada = create_dataset(train_data_ada, time_step)
X_test_ada, y_test_ada = create_dataset(test_data_ada, time_step)

print("X_train: ", X_train_ada.shape)
print("y_train: ", y_train_ada.shape)
print("X_test: ", X_test_ada.shape)
print("y_test: ", y_test_ada.shape)

X_train:  (7795, 15, 6)
y_train:  (7795,)
X_test:  (5192, 15, 6)
y_test:  (5192,)


In [57]:
X_train_ada = X_train_ada.reshape(X_train_ada.shape[0],X_train_ada.shape[1] , 6)
X_test_ada = X_test_ada.reshape(X_test_ada.shape[0],X_test_ada.shape[1] , 6)

print("X_train: ", X_train_ada.shape)
print("X_test: ", X_test_ada.shape)

X_train:  (7795, 15, 6)
X_test:  (5192, 15, 6)


# ADA Results

In [58]:
train_predict_ada = model.predict(X_train_ada)
test_predict_ada = model.predict(X_test_ada)

train_predict_ada.shape, test_predict_ada.shape



((7795, 1), (5192, 1))

In [59]:
new_scaler_ada = StandardScaler()

new_scaler_ada.fit_transform(np.array(features_ada['volume']).reshape(-1,1))

array([[ 1.0090049 ],
       [ 1.80266436],
       [ 2.47723089],
       ...,
       [ 0.03761362],
       [-0.35629952],
       [-0.37458705]])

In [60]:
train_predict_ada = new_scaler_ada.inverse_transform(train_predict_ada.reshape(-1,1))
test_predict_ada = new_scaler_ada.inverse_transform(test_predict_ada.reshape(-1,1))

original_ytrain_ada = new_scaler_ada.inverse_transform(y_train_ada.reshape(-1,1)) 
original_ytest_ada = new_scaler_ada.inverse_transform(y_test_ada.reshape(-1,1)) 

In [61]:
print("Train data R2 score:", r2_score(original_ytrain_ada, train_predict_ada))
print("Test data R2 score:", r2_score(original_ytest_ada, test_predict_ada))

Train data R2 score: 0.48493254819865017
Test data R2 score: 0.29949468820689606


In [62]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain_ada,train_predict_ada)))
print("Train data MSE: ", mean_squared_error(original_ytrain_ada,train_predict_ada))
print("Train data MAE: ", mean_absolute_error(original_ytrain_ada,train_predict_ada))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest_ada,test_predict_ada)))
print("Test data MSE: ", mean_squared_error(original_ytest_ada,test_predict_ada))
print("Test data MAE: ", mean_absolute_error(original_ytest_ada,test_predict_ada))

Train data RMSE:  145041758.93770194
Train data MSE:  2.1037111835742444e+16
Train data MAE:  51469388.65644644
-------------------------------------------------------------------------------------
Test data RMSE:  45418302.30275886
Test data MSE:  2062822184064790.5
Test data MAE:  31298681.28659476


In [63]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain_ada, train_predict_ada))
print("Test data MGD: ", mean_gamma_deviance(original_ytest_ada, test_predict_ada))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain_ada, train_predict_ada))
print("Test data MPD: ", mean_poisson_deviance(original_ytest_ada, test_predict_ada))

Train data MGD:  0.4047035295275728
Test data MGD:  0.30525757846825796
----------------------------------------------------------------------
Train data MPD:  48675799.2152881
Test data MPD:  20754118.163483758


In [64]:
print("Train data explained variance regression score:", 
      explained_variance_score(original_ytrain_ada, train_predict_ada))
print("Test data explained variance regression score:", 
      explained_variance_score(original_ytest_ada, test_predict_ada))

Train data explained variance regression score: 0.48509595169722874
Test data explained variance regression score: 0.3787759674139104


In [65]:
look_back=time_step
trainPredictPlot = np.empty_like(features_ada['volume'])
trainPredictPlot[:] = np.nan
trainPredictPlot[look_back:len(train_predict_ada)+look_back] = train_predict_ada.reshape(1,-1)
print("Train predicted data: ", trainPredictPlot.shape)

testPredictPlot = np.empty_like(features_ada['volume'])
testPredictPlot[:] = np.nan
testPredictPlot[len(train_predict_ada)+(look_back*2)+1:len(features_ada)-1] = test_predict_ada.reshape(1,-1)
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock_ada['date'],
                       'original_close': close_stock_ada['volume'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

plotdf['original_close'] = pd.to_numeric(plotdf['original_close'])

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (13019,)
Test predicted data:  (13019,)


# LINK data for volume

In [None]:
link_ds_path = '/content/drive/MyDrive/NN_CW_DATASETS/link_usdt_data.csv'
link_df = pd.read_csv(link_ds_path)
link_df.drop('close_time', axis=1, inplace = True)
link_df.drop('cryptoname', axis=1, inplace = True)

link_df

In [None]:
print('Null Values:', link_df.isnull().values.sum())
print('NA values:', link_df.isnull().values.any())

In [67]:
features_link = link_df[['date', "volume", 'close', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume']]
print("Shape of close dataframe:", features_link.shape)

Shape of close dataframe: (12658, 7)


In [68]:
features_link = features_link[(features_link['date'] > '2019-11-27') & (features_link['date'] < '2022-12-31')]
close_stock_link = features_link[["date", 'volume']]
print("Total data for prediction: ", features_link.shape)
print("Total data for prediction: ", features_link.shape)

Total data for prediction:  (12633, 7)
Total data for prediction:  (12633, 7)


In [69]:
features_link = features_link.drop('date', axis = 1)

scaler_link = StandardScaler()

features_link_scaled = scaler_link.fit_transform(np.array(features_link).reshape(-1,6))

print(features_link.shape)

(12633, 6)


In [70]:
training_size = int(len(features_link_scaled) * 0.6)
test_size = len(features_link_scaled) - training_size

train_data_link, test_data_link = features_link_scaled[0:training_size, :], features_link_scaled[training_size:len(features_link_scaled), :]

print("train_data: ", train_data_link.shape)
print("test_data: ", test_data_link.shape)

train_data:  (7579, 6)
test_data:  (5054, 6)


In [71]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), :]  
        dataX.append(a)
        dataY.append(dataset[i + time_step, :][0])  
    return np.array(dataX), np.array(dataY)

time_step = 15
X_train_link, y_train_link = create_dataset(train_data_link, time_step)
X_test_link, y_test_link = create_dataset(test_data_link, time_step)

print("X_train: ", X_train_link.shape)
print("y_train: ", y_train_link.shape)
print("X_test: ", X_test_link.shape)
print("y_test: ", y_test_link.shape)

X_train:  (7563, 15, 6)
y_train:  (7563,)
X_test:  (5038, 15, 6)
y_test:  (5038,)


In [72]:
X_train_link = X_train_link.reshape(X_train_link.shape[0],X_train_link.shape[1] , 6)
X_test_link = X_test_link.reshape(X_test_link.shape[0],X_test_link.shape[1] , 6)

print("X_train: ", X_train_link.shape)
print("X_test: ", X_test_link.shape)

X_train:  (7563, 15, 6)
X_test:  (5038, 15, 6)


# LINK Results

In [73]:
train_predict_link = model.predict(X_train_link)
test_predict_link = model.predict(X_test_link)

train_predict_link.shape, test_predict_link.shape



((7563, 1), (5038, 1))

In [74]:
new_scaler_link = StandardScaler()

new_scaler_link.fit_transform(np.array(features_link['volume']).reshape(-1,1))

array([[-0.17898569],
       [-0.61275161],
       [-0.60939354],
       ...,
       [ 1.53610277],
       [-0.17892038],
       [-0.24807197]])

In [75]:
train_predict_link = new_scaler_link.inverse_transform(train_predict_link.reshape(-1,1))
test_predict_link = new_scaler_link.inverse_transform(test_predict_link.reshape(-1,1))

original_ytrain_link = new_scaler_link.inverse_transform(y_train_link.reshape(-1,1)) 
original_ytest_link = new_scaler_link.inverse_transform(y_test_link.reshape(-1,1)) 

In [76]:
print("Train data R2 score:", r2_score(original_ytrain_link, train_predict_link))
print("Test data R2 score:", r2_score(original_ytest_link, test_predict_link))

Train data R2 score: 0.34359653029360204
Test data R2 score: 0.39369918922383074


In [77]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain_link,train_predict_link)))
print("Train data MSE: ", mean_squared_error(original_ytrain_link,train_predict_link))
print("Train data MAE: ", mean_absolute_error(original_ytrain_link,train_predict_link))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest_link,test_predict_link)))
print("Test data MSE: ", mean_squared_error(original_ytest_link,test_predict_link))
print("Test data MAE: ", mean_absolute_error(original_ytest_link,test_predict_link))

Train data RMSE:  1368785.4817007286
Train data MSE:  1873573694914.6958
Train data MAE:  878629.5952079201
-------------------------------------------------------------------------------------
Test data RMSE:  1814125.3554871324
Test data MSE:  3291050805421.315
Test data MAE:  1073300.169780667


In [78]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain_link, train_predict_link))
print("Test data MGD: ", mean_gamma_deviance(original_ytest_link, test_predict_link))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain_link, train_predict_link))
print("Test data MPD: ", mean_poisson_deviance(original_ytest_link, test_predict_link))

Train data MGD:  0.2996065934732796
Test data MGD:  0.25669164307427533
----------------------------------------------------------------------
Train data MPD:  597564.3911808117
Test data MPD:  733057.4592961222


In [79]:
print("Train data explained variance regression score:", 
      explained_variance_score(original_ytrain_link, train_predict_link))
print("Test data explained variance regression score:", 
      explained_variance_score(original_ytest_link, test_predict_link))

Train data explained variance regression score: 0.3627352017872295
Test data explained variance regression score: 0.3978051875775602


In [80]:
look_back=time_step
trainPredictPlot = np.empty_like(features_link['volume'])
trainPredictPlot[:] = np.nan
trainPredictPlot[look_back:len(train_predict_link)+look_back] = train_predict_link.reshape(1,-1)
print("Train predicted data: ", trainPredictPlot.shape)

testPredictPlot = np.empty_like(features_link['volume'])
testPredictPlot[:] = np.nan
testPredictPlot[len(train_predict_link)+(look_back*2)+1:len(features_link)-1] = test_predict_link.reshape(1,-1)
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock_link['date'],
                       'original_close': close_stock_link['volume'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

plotdf['original_close'] = pd.to_numeric(plotdf['original_close'])

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (12633,)
Test predicted data:  (12633,)
