In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

# Create the Stacked LSTM model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

import math
import tensorflow as tf
from sklearn.metrics import mean_squared_error

import pickle


In [5]:
symbol = "ADAUSDT"

ADAData = pd.read_csv(f"./preparedData/{symbol}.csv", date_parser=True)
ADAData.head()


Unnamed: 0,open_time,open,high,low,close,volume,close_time
0,2018-05-01 05:30:00,0.34145,0.34199,0.3187,0.32173,32463784.08,2018-05-01 09:29:59
1,2018-05-01 09:30:00,0.32115,0.343,0.32,0.33787,14470581.61,2018-05-01 13:29:59
2,2018-05-01 13:30:00,0.33756,0.359,0.33421,0.34093,25321414.19,2018-05-01 17:29:59
3,2018-05-01 17:30:00,0.34003,0.3429,0.33166,0.33735,16675168.68,2018-05-01 21:29:59
4,2018-05-01 21:30:00,0.33776,0.3595,0.33714,0.35374,18237446.69,2018-05-02 01:29:59


In [6]:
# close column of the data
close = ADAData['close']

# number of rows in data
data_shape = close.shape
rows = data_shape[0]


In [7]:
# plot of close price movement over the time period
# plt.figure(figsize=(15, 7))
# plt.plot(close)
# plt.show()

fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(0, rows)), y=close,
                         mode='lines',
                         name='lines', line=dict(width=1.5)))
fig.update_layout(template="plotly_dark", title="Original Close Data")
fig.show()


In [8]:
# Normalization
# MinMaxScaler is used to normalize the data
# LSTM are sensitive to the scale of the data.

scaler = MinMaxScaler(feature_range=(0, 1))
normalized_data = scaler.fit_transform(np.array(close).reshape(-1, 1))
print(normalized_data)
print("Shape : ", normalized_data.shape)


[[0.0989652 ]
 [0.10429949]
 [0.10531082]
 ...
 [0.59612785]
 [0.60372937]
 [0.59546685]]
Shape :  (7770, 1)


In [9]:
# data train
data_train = normalized_data[:math.floor(rows*0.8), :]

# data test
data_test = normalized_data[math.floor(rows*0.8):, :]

print(data_train.shape)
print(data_test.shape)


(6216, 1)
(1554, 1)


In [10]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)


In [11]:
time_step = 100
X_train, Y_train = create_dataset(data_train, time_step)
X_test, Y_test = create_dataset(data_test, time_step)

print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)


(6115, 100) (6115,)
(1453, 100) (1453,)


In [12]:
# reshape input to be [samples, time steps, features]
# which is required for LSTM
# basically 3-D data is required as input to LSTM

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)


In [11]:
# added basic properties of stacked LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


2021-11-24 19:19:55.878917: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-24 19:19:55.879418: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-11-24 19:19:55.879507: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2021-11-24 19:19:55.879586: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2021-11-24 19:19:55.879658: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

In [12]:
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 50)           10400     
                                                                 
 lstm_1 (LSTM)               (None, 100, 50)           20200     
                                                                 
 lstm_2 (LSTM)               (None, 50)                20200     
                                                                 
 dense (Dense)               (None, 1)                 51        
                                                                 
Total params: 50,851
Trainable params: 50,851
Non-trainable params: 0
_________________________________________________________________


In [13]:
# fitting training and test data into the model.

# model.fit(X_train, Y_train, validation_data=(
#     X_test, Y_test), epochs=100, batch_size=64, verbose=1)

# saving model for future prdictions

# model_name = f"./models/LSTM_Model({time_step}).sav"
# pickle.dump(model, open(model_name, 'wb'))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f4aa1f94820>

In [16]:
# importing trained model
modal_file_path = f"./models/LSTM_Model({time_step}).sav"
loaded_model = pickle.load(open(modal_file_path, 'rb'))


2021-11-25 13:51:43.680011: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-25 13:51:43.680374: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-11-25 13:51:43.680485: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2021-11-25 13:51:43.680536: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2021-11-25 13:51:43.680582: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

In [17]:
model = loaded_model

# Now performing the prediction and check performance metrics

predict_train = model.predict(X_train)
predict_test = model.predict(X_test)


In [18]:
# Transformback the data to original form

predict_train = scaler.inverse_transform(predict_train)
predict_test = scaler.inverse_transform(predict_test)


In [16]:
# Calculating RMSE performance metrics
# for training data
print("RMSE for training data : ", math.sqrt(
    mean_squared_error(Y_train, predict_train)))

# for test data
print("RMSE for test data : ", math.sqrt(
    mean_squared_error(Y_test, predict_test)))


RMSE for training data :  0.13292786443047935
RMSE for test data :  1.1956490777860636


In [47]:
# Plotting
# shift train predictions for plotting
look_back = time_step
trainPredictPlot = np.empty_like(normalized_data)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(predict_train)+look_back, :] = predict_train

# shift test predictions for plotting
testPredictPlot = np.empty_like(normalized_data)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(predict_train)+(look_back*2) +
                1: len(normalized_data)-1, :] = predict_test

# plot baseline and predictions
# plt.figure(figsize=(15, 7))
# plt.plot(scaler.inverse_transform(normalized_data), label="Original Data")
# plt.plot(trainPredictPlot, label="Train Predicted Data")
# plt.plot(testPredictPlot, label="Test Predicted Data")
# plt.legend()
# plt.show()


fig = go.Figure()
fig.add_trace(go.Scatter(y=scaler.inverse_transform(normalized_data).flatten(), name='Original Close Values',
                         line=dict(color='royalblue', width=1.5)))
fig.add_trace(go.Scatter(y=trainPredictPlot.flatten(), name='Train Predicted Data',
                         line=dict(color='orange', width=1)))
fig.add_trace(go.Scatter(y=testPredictPlot.flatten(), name='Test Predicted Data',
                         line=dict(color='green', width=1)))
fig.update_layout(template="plotly_dark",
                  title="Train_Predict vs Test_Predict")
fig.show()


In [18]:
# now predicting for next records.
# for which we will be required the data of last time_step records
# which will we get from data_test(last 100 records)

X_input_2d = data_test[len(data_test)-time_step:].reshape(1, -1)

# converting the time_step data to a list
X_input_list = X_input_2d[0].tolist()


In [19]:
# Now finding the prediction for next records

n_steps = time_step
data_to_predict = 30
X_output_2d = []

i = 0
while(i < data_to_predict):
    if(len(X_input_list) > n_steps):

        # removing 1st data from list and reshaping array to 2-D
        X_input_2d = np.array(X_input_list[1:]).reshape(1, -1)

        # reshaping array to 3-D for model prediction
        X_input_3d = X_input_2d.reshape((1, n_steps, 1))

        X_output_3d = model.predict(X_input_3d)

        X_input_list.extend(X_output_3d[0].tolist())
        X_input_list = X_input_list[1:]

        X_output_2d.extend(X_output_3d.tolist())

    else:
        X_input_3d = X_input_2d.reshape((1, n_steps, 1))
        X_output_3d = model.predict(X_input_3d)

        X_input_list.extend(X_output_3d[0].tolist())
        X_output_2d.extend(X_output_3d.tolist())
    i = i+1

print(X_output_2d)


[[0.5917566418647766], [0.5896757245063782], [0.5875868201255798], [0.585837721824646], [0.5843107104301453], [0.5829699635505676], [0.581793487071991], [0.5807649493217468], [0.5798673033714294], [0.5790795683860779], [0.5783777236938477], [0.5777356028556824], [0.5771276950836182], [0.5765307545661926], [0.5759256482124329], [0.5752979516983032], [0.5746376514434814], [0.5739393830299377], [0.5732014179229736], [0.5724256634712219], [0.5716159343719482], [0.5707777738571167], [0.5699179172515869], [0.5690438747406006], [0.5681628584861755], [0.5672821998596191], [0.5664092302322388], [0.5655497908592224], [0.5647099614143372], [0.5638940930366516]]


In [52]:
# now plotting the next data with previous time_step data

data_last_time_step = np.arange(1, time_step + 1)
data_next = np.arange(time_step+1, time_step + 1 + data_to_predict)

# plt.figure(figsize=(10, 7))
# plt.plot(data_last_timeStep, scaler.inverse_transform(
#     data_test[len(data_test)-time_step:]), label=f"Last {timeStep} Data")
# plt.plot(data_next, scaler.inverse_transform(
#     X_output_2d), label=f"Next {data_to_predict} Data")
# plt.legend()
# plt.show()

fig = go.Figure()
fig.add_trace(go.Scatter(x=data_last_time_step, y=scaler.inverse_transform(
    data_test[len(data_test)-time_step:]).flatten(), name=f'Last {time_step} Data',
    line=dict(color='royalblue', width=1)))
fig.add_trace(go.Scatter(x=data_next, y=scaler.inverse_transform(
    X_output_2d).flatten(), name=f'Next {data_to_predict} Data',
    line=dict(color='orange', width=1)))
fig.update_layout(template="plotly_dark",
                  title=f"Next {data_to_predict} Prediction")
fig.show()


In [54]:
old_data = normalized_data.tolist()
old_data.extend(X_output_2d)
# plt.figure(figsize=(15, 7))
# plt.plot(scaler.inverse_transform(old_data))
# plt.show()

fig = go.Figure()
fig.add_trace(go.Scatter(y=scaler.inverse_transform(old_data).flatten(), name=f'Last {time_step} Data',
                         line=dict(color='royalblue', width=1)))
fig.update_layout(template="plotly_dark",
                  title="New Data with Prediction")
fig.show()
