<a href="https://colab.research.google.com/github/Sina-Akhavi/bitcoin-timeseries-Forecasting/blob/main/bitcoin_forecasting_using_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
os.chdir('./sample_data')

In [None]:
import pandas as pd
import numpy as np
import math
import datetime as dt
import matplotlib.pyplot as plt
from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
# ------------------------------------------------
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
# ------------------------------------------------
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM


maindf = pd.read_csv('./BTC-USD.csv')


FileNotFoundError: ignored

#Check for the Null Values

In [None]:
print("Null values: ", maindf.isnull().values.sum())


In [None]:
print("NA values: ", maindf.isnull().values.any())

In [None]:
maindf.shape

#Overall overview from 2014-2022

In [None]:
maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d')
y_overall = maindf.loc[(maindf['Date'] >= '2014-9-17') & (maindf['Date'] <= '2022-02-19')]
y_overall.drop(y_overall[['Adj Close', 'Volume']], axis=1)

monthvise = y_overall.groupby(y_overall['Date'].dt.strftime('%B'))[['Open', 'Close']].mean()
new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
monthvise = monthvise.reindex(new_order, axis=0)
monthvise

In [None]:
names = cycle(['Stock Open Price', 'Stock Close Price', 'Stock High Price', 'Stock Low Price'])
fig = px.line(y_overall, x=y_overall.Date, y=[y_overall['Open'], y_overall['Close'], y_overall['High'], y_overall['Low']],
              labels={'Date': 'Date', 'Value': 'Stock Value'})

fig.update_layout(title_text='Stock Analysis chart', font_size=15, font_color='black', legend_title_text='Stock Parameters')
fig.for_each_trace(lambda t: t.update(name=next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

#Normalization

In [None]:
closedf = maindf[['Date', 'Close']]
closedf

In [None]:
# closedf
# closedf = closedf[closedf['Date'] > '2021-02-19']

closedf = closedf[closedf['Date'] > '2014-09-17']
closedf = closedf[closedf['Date'] < '2020-08-01']
close_stock = closedf.copy()


In [None]:
close_stock

In [None]:
fig = px.line(closedf, x=closedf['Date'], y=closedf['Close'], labels={'date': 'Date', 'close': 'Close Stock'})
fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
fig.update_layout(title_text='Considered Period to predict Bitcoin Close Price', plot_bgcolor='white',
                  font_size=15, font_color='black')
fig.show()

In [None]:
del closedf['Date']
scaler = MinMaxScaler()
closedf = scaler.fit_transform(np.array(closedf).reshape(-1, 1))

closedf

In [None]:
training_size = int(len(closedf) * 0.80)
test_size = len(closedf) - training_size
train_data, test_data = closedf[0: training_size, :], closedf[training_size: len(closedf), :1]

# train_data[:5]
print('train_data shape: ', train_data.shape)
print('test_data shape: ', test_data.shape)

#Getting Familiar with dataset

- 1715 records for training data starting from 2014-09-18
- 429 records for test data ending date is 2020-07-31


In [None]:
def create_XtrainYtrain(dataset, time_step):

  data_X, data_Y = [], []

  for i in range(len(dataset) - time_step - 1):
    a = dataset[i: (i + time_step), 0]
    data_X.append(a)
    data_Y.append(dataset[(i + time_step), 0])

  return np.array(data_X), np.array(data_Y)



In [None]:
time_step = 15
X_train, Y_train = create_XtrainYtrain(train_data, time_step)
X_test, Y_test = create_XtrainYtrain(test_data, time_step)

In [None]:
print('x_train shape:', X_train.shape)
print('y_train shape:', Y_train.shape)
print('x_test shape:', X_test.shape)
print('y_test shape:', Y_test.shape)

In [None]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print('X_train.shape: ', X_train.shape)
print('X_test.shape: ', X_test.shape)

#LSTM Model Construction

In [None]:
from tensorflow.keras.layers import Dropout

model = Sequential()
model.add(LSTM(31, input_shape=(time_step, 1), activation='relu',
               return_sequences=True))
model.add(LSTM(31, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1))

model.compile(loss="mean_squared_error", optimizer="adam")
history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test),
                    epochs=200, batch_size=32, verbose=1)

In [None]:
# Let's do the prediction

In [None]:
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# do the inverse transformation

train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)

original_ytrain = scaler.inverse_transform(Y_train.reshape(-1, 1))
original_ytest = scaler.inverse_transform(Y_test.reshape(-1, 1))

#Evaluation metrics


In [None]:
print('Train Data RMSE: ', math.sqrt(mean_squared_error(original_ytrain, train_predict)))
print('Train Data MSE: ', mean_squared_error(original_ytrain, train_predict))
print('Train Data MAE: ', mean_absolute_error(original_ytrain, train_predict))
print('----------------------------------------------------------')
print('Test Data RMSE: ', math.sqrt(mean_squared_error(original_ytest, test_predict)))
print('Test Data MSE: ', mean_squared_error(original_ytest, test_predict))
print('Test Data MAE: ', mean_absolute_error(original_ytest, test_predict))

In [None]:
# shift train predictions for plotting
look_back = time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back: len(train_predict) + look_back, :] = train_predict
print("Train Predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict) + (look_back * 2) + 1: len(closedf)-1, :] = test_predict
print("Test Predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price', 'Train Predicted close price',  'Test Predicted close price'])

plotdf = pd.DataFrame({'date': close_stock['Date'],
                       'original_close': close_stock['Close'],
                       'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
                       'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist()})

fig = px.line(plotdf, x=plotdf['date'], y=[plotdf['original_close'], plotdf['train_predicted_close'], plotdf['test_predicted_close']],
              labels={'value': 'stock price', 'date': 'Data'})

fig.update_layout(title_text='Comparison between original close price vs predicted close price', plot_bgcolor='white', font_size=15,
                  font_color='black', legend_title_text='Close price')

fig.for_each_trace(lambda t: t.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
