> # Stock price Prediction using LSTM Deep learning model

> ## Importing Libraries

In [1]:
import os
import pandas as pd
import numpy as np

import datetime as dt
import math
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM

from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

print('All Libraries imported.')

In [2]:
maindf=pd.read_csv('../input/bitcoin-dataset/BTC-USD.csv') #loading the dataset
print ('dataset loaded')

In [3]:
print('Number of days present in the dataset: ',maindf.shape[0])
print('Number of fields present in the dataset: ',maindf.shape[1])

In [4]:
maindf.shape #(no of rows, no of columns)

In [5]:
maindf.head() #shows first 5 rows of the dataset

In [6]:
maindf.tail()  #shows last 5 rows of the dataset

In [7]:
maindf.info() #provides the info of datatypes columnwise

In [8]:
maindf.describe() #provides various parameters useful for understanding the dataset

> ## Checking for the NULL values

In [9]:
print('Null Values:',maindf.isnull().values.sum()) #finds any null values

In [10]:
print('NA values:',maindf.isnull().values.any()) #checks for null values

In [11]:
maindf=maindf.dropna() #deletes the null values rows

> ## Confirming the removal of NULL values

In [12]:
print('Null Values:',maindf.isnull().values.sum())

In [13]:
print('NA values:',maindf.isnull().values.any())

In [14]:
#Checking the loss in the no of rows
maindf.shape

In [15]:
#Checking the starting and ending date
sd=maindf.iloc[0][0] 
ed=maindf.iloc[-1][0]

print('Starting Date',sd)
print('Ending Date',ed)

> ## Year 2020 stocks analysis

In [16]:
maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d')

y_2020 = maindf.loc[(maindf['Date'] >= '2020-01-01')
                     & (maindf['Date'] < '2021-01-01')]

y_2020.drop(y_2020[['Adj Close','Volume']],axis=1)

In [17]:
month_wise= y_2020.groupby(y_2020['Date'].dt.strftime('%B'))[['Open','Close']].mean()
new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 
             'September', 'October', 'November', 'December']
month_wise = month_wise.reindex(new_order, axis=0)
month_wise

In [18]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=month_wise.index,
    y=month_wise['Open'],
    name='Stock Open Price',
    marker_color='lightgreen'
))
fig.add_trace(go.Bar(
    x=month_wise.index,
    y=month_wise['Close'],
    name='Stock Close Price',
    marker_color='tomato'
))

fig.update_layout(barmode='group', xaxis_tickangle=-45, 
                  title='Monthwise comparision between Stock open and close price')
fig.show()

In [19]:
y_2020.groupby(y_2020['Date'].dt.strftime('%B'))['Low'].min()
month_wise_high = y_2020.groupby(maindf['Date'].dt.strftime('%B'))['High'].max()
month_wise_high = month_wise_high.reindex(new_order, axis=0)

month_wise_low = y_2020.groupby(y_2020['Date'].dt.strftime('%B'))['Low'].min()
month_wise_low = month_wise_low.reindex(new_order, axis=0)

fig = go.Figure()
fig.add_trace(go.Bar(
    x=month_wise_high.index,
    y=month_wise_high,
    name='Stock high Price',
    marker_color='rgb(0, 153, 204)'
))
fig.add_trace(go.Bar(
    x=month_wise_low.index,
    y=month_wise_low,
    name='Stock low Price',
    marker_color='rgb(255, 128, 0)'
))

fig.update_layout(barmode='group', 
                  title=' Monthwise High and Low stock price')
fig.show()

In [20]:
names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])

fig = px.line(y_2020, x=y_2020.Date, y=[y_2020['Open'], y_2020['Close'], 
                                          y_2020['High'], y_2020['Low']],
             labels={'Date': 'Date','value':'Stock value'})
fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')
fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.update_yaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')

fig.show()

> ## Year 2021 stock analysis

In [21]:
maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d')

y_2021 = maindf.loc[(maindf['Date'] >= '2021-01-01')
                     & (maindf['Date'] < '2021-12-31')]

y_2021.drop(y_2021[['Adj Close','Volume']],axis=1)

In [22]:
month_wise= y_2021.groupby(y_2021['Date'].dt.strftime('%B'))[['Open','Close']].mean()
new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 
             'September', 'October', 'November', 'December']
month_wise = month_wise.reindex(new_order, axis=0)
month_wise

In [23]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=month_wise.index,
    y=month_wise['Open'],
    name='Stock Open Price',
    marker_color='lightgreen'
))
fig.add_trace(go.Bar(
    x=month_wise.index,
    y=month_wise['Close'],
    name='Stock Close Price',
    marker_color='tomato'
))

fig.update_layout(barmode='group', xaxis_tickangle=-45, 
                  title='Monthwise comparision between Stock open and close price')
fig.show()

In [24]:
y_2021.groupby(y_2021['Date'].dt.strftime('%B'))['Low'].min()
month_wise_high = y_2021.groupby(maindf['Date'].dt.strftime('%B'))['High'].max()
month_wise_high = month_wise_high.reindex(new_order, axis=0)

month_wise_low = y_2021.groupby(y_2021['Date'].dt.strftime('%B'))['Low'].min()
month_wise_low = month_wise_low.reindex(new_order, axis=0)

fig = go.Figure()
fig.add_trace(go.Bar(
    x=month_wise_high.index,
    y=month_wise_high,
    name='Stock high Price',
    marker_color='rgb(0, 153, 204)'
))
fig.add_trace(go.Bar(
    x=month_wise_low.index,
    y=month_wise_low,
    name='Stock low Price',
    marker_color='rgb(255, 128, 0)'
))

fig.update_layout(barmode='group', 
                  title=' Monthwise High and Low stock price')
fig.show()

In [25]:
names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])

fig = px.line(y_2021, x=y_2021.Date, y=[y_2021['Open'], y_2021['Close'], 
                                          y_2021['High'], y_2021['Low']],
             labels={'Date': 'Date','value':'Stock value'})
fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')
fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.update_yaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')

fig.show()

> ## Analysis from 2015-2021

In [26]:
maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d')

y_overall = maindf.loc[(maindf['Date'] >= '2014-09-17')
                     & (maindf['Date'] <= '2021-08-24')]

y_overall.drop(y_overall[['Adj Close','Volume']],axis=1)

In [27]:
month_wise= y_overall.groupby(y_overall['Date'].dt.strftime('%B'))[['Open','Close']].mean()
new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 
             'September', 'October', 'November', 'December']
month_wise = month_wise.reindex(new_order, axis=0)
month_wise

In [28]:
names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])

fig = px.line(y_overall, x=y_overall.Date, y=[y_overall['Open'], y_overall['Close'], 
                                          y_overall['High'], y_overall['Low']],
             labels={'Date': 'Date','value':'Stock value'})
fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')
fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.update_yaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.show()

> ## Initializing LSTM model building

In [29]:
# Lets First Take all the Close Price 
closedf = maindf[['Date','Close']]
print("Shape of close dataframe:", closedf.shape)

In [30]:
fig = px.line(closedf, x=closedf.Date, y=closedf.Close,labels={'date':'Date','close':'Close Stock'})
fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
fig.update_layout(title_text='Considered period to predict Stock close price', plot_bgcolor='white', font_size=15, font_color='black')
fig.update_xaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.update_yaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.show()

> ## Taking data of just 1 Year

In [31]:
closedf = closedf[closedf['Date'] > '2020-08-21']
close_stock = closedf.copy()
print("Total data for prediction: ",closedf.shape[0])

In [32]:
fig = px.line(closedf, x=closedf.Date, y=closedf.Close,labels={'date':'Date','close':'Close Stock'})
fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
fig.update_layout(title_text='Considered period to predict Stock close price(1 Year)', plot_bgcolor='white', font_size=15, font_color='black')
fig.update_xaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.update_yaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.show()

In [33]:
#Normalizing Data
del closedf['Date']
scaler=MinMaxScaler(feature_range=(0,1))
closedf=scaler.fit_transform(np.array(closedf).reshape(-1,1))
print(closedf.shape)

In [34]:
#Splitting Dataset into training and test data
training_size=int(len(closedf)*0.80)
test_size=len(closedf)-training_size
train_data,test_data=closedf[0:training_size,:],closedf[training_size:len(closedf),:1]
print("train_data: ", train_data.shape)
print("test_data: ", test_data.shape)

> ### Now we Transform the Close price based on Time-series-analysis forecasting requirement

In [35]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [36]:
time_step = 10
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test", y_test.shape)

In [37]:
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

> ## Initializing model building after dataset split

In [38]:
model=Sequential()

model.add(LSTM(10,input_shape=(None,1),activation="relu"))
model.add(Dense(1))

model.compile(loss="mean_squared_error",optimizer="adam")

print('Model loaded')

In [39]:
history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=500,batch_size=32,verbose=1)

> ## Plotting loss vs Validation loss

In [40]:
#Plotting Validation Loss and Plotting Loss
import matplotlib.pyplot as plt

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(loss))

plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc=0)
plt.figure()


plt.show()

In [41]:
# Storing predicted values
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)
train_predict.shape, test_predict.shape

In [42]:
# Transform back to original form
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

> ## Evaluation metrices RMSE, MSE and MAE

In [43]:
# Evaluation metrices RMSE,MSE and MAE
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

> ## Variance Regression Score

In [44]:
print("Train data explained variance regression score:", 
      explained_variance_score(original_ytrain, train_predict))
print("Test data explained variance regression score:", 
      explained_variance_score(original_ytest, test_predict))

In [45]:
# shift train predictions for plotting

look_back=time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock['Date'],
                       'original_close': close_stock['Close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.update_yaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.show()

> ## Plotting last 15 days of dataset and next predicted 30 days

In [46]:
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

from numpy import array

lst_output=[]
n_steps=time_step
i=0
pred_days = 30
while(i<pred_days):
    
    if(len(temp_input)>time_step):
        
        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input = x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        
        yhat = model.predict(x_input, verbose=0)
        #print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
       
        lst_output.extend(yhat.tolist())
        i=i+1
        
    else:
        
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        
        lst_output.extend(yhat.tolist())
        i=i+1
               
print("Output of predicted next days: ", len(lst_output))

In [47]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
print(last_days)
print(day_pred)

In [59]:
temp_mat = np.empty((len(last_days)+pred_days+1,1))
temp_mat[:] = np.nan
temp_mat = temp_mat.reshape(1,-1).tolist()[0]

last_original_days_value = temp_mat
next_predicted_days_value = temp_mat

last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})

names = cycle(['Last 15 days close price','Predicted next 30 days close price'])

fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                      new_pred_plot['next_predicted_days_value']],
              labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Compare last 15 days vs next 30 days',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')

fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.update_yaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.show()



> ## Plotting entire Closing Stock Price with next 30 days period of prediction

In [49]:
lstmdf=closedf.tolist()
lstmdf.extend((np.array(lst_output).reshape(-1,1)).tolist())
lstmdf=scaler.inverse_transform(lstmdf).reshape(1,-1).tolist()[0]

names = cycle(['Close price'])

fig = px.line(lstmdf,labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')

fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.update_yaxes(showgrid=True, gridwidth=0.01, gridcolor='LightPink')
fig.show()