**Task 2 - Stock Market Prediction And Forecasting Using Stacked LSTM**

**Link for the dataset:** https://raw.githubusercontent.com/mwitiderrick/stockprice/master/NSE-TATAGLOBAL.csv

**Importing libraries**

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf

**Load the dataset**

In [5]:
df=pd.read_csv("https://raw.githubusercontent.com/mwitiderrick/stockprice/master/NSE-TATAGLOBAL.csv")
df

In [6]:
df.shape

In [7]:
df.columns

In [8]:
df.info()

In [9]:
df.describe()

**Handling Null value**

In [10]:
df.isnull().sum()

**Data Visualization**

In [11]:
import seaborn as sns
plt.figure(figsize = (8,8))
sns.heatmap(df.corr(), annot = True)

In [12]:
plt.plot(df['High'])

In [13]:
df.Date = pd.to_datetime(df.Date,format="%Y-%m-%d")
df.index = df['Date']
df.drop('Date', axis = 1,inplace = True)
df

LSTM are sensitive to the scale of the data so we apply MinMax scaler

In [14]:
df_high=df.reset_index()['High']

In [15]:
df_high

In [17]:
plt.plot(df['High'])

In [18]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler(feature_range=(0,1))
df_high=scaler.fit_transform(np.array(df_high).reshape(-1,1))

In [19]:
print(df_high)

**Data Splitting**

In [20]:
training_size=int(len(df_high)*0.75)
test_size=len(df_high)-training_size
train_data,test_data=df_high[0:training_size,:],df_high[training_size:len(df_high),:1]

In [21]:
training_size,test_size

In [22]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [23]:
# reshape into X=t,t+1,t+2,t+3 and Y=t+4
time_step = 100
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

In [24]:
print(X_train.shape), print(y_train.shape)

In [25]:
print(X_test.shape), print(y_test.shape)

In [26]:
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

**Creating a LSTM model**

In [27]:
# Create the Stacked LSTM model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

In [28]:
model=Sequential()
model.add(LSTM(50,return_sequences=True,input_shape=(100,1)))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')

In [29]:
model.summary()


In [30]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=100,batch_size=64,verbose=1)

**Predictions**

In [31]:
# Lets do the prediction and check performance metrics
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

In [32]:
#Transformback to original form
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)

**Model Evaluation**

In [33]:
# Calculate RMSE performance metrics
import math
from sklearn.metrics import mean_squared_error
math.sqrt(mean_squared_error(y_train,train_predict))

In [34]:
# Test Data RMSE
math.sqrt(mean_squared_error(y_test,test_predict))

**Plot the results**

In [36]:
#Shift train prediction for plotting
look_back = 100
trainPredictPlot = np.empty_like(df_high)
trainPredictPlot[:,:] = np.nan
trainPredictPlot[look_back:len(train_predict) + look_back, :] = train_predict

#Shift test prediction for plotting
testPredictPlot = np.empty_like(df_high)
testPredictPlot[:,:] = np.nan
testPredictPlot[len(train_predict) + (look_back * 2)+1:len(df_high) - 1, :] = test_predict

#Plot baseline and predictions
plt.figure(figsize=(15,5))
plt.plot(scaler.inverse_transform(df_high))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

**Making predictions for next 10 days**

In [38]:
len(test_data)

In [39]:
X_input = test_data[409:].reshape(1,-1)
X_input.shape

In [40]:
temp_input = list(X_input)
temp_input = temp_input[0].tolist()

In [42]:
from numpy import array

lst_output=[]
n_steps=100
i=0
while(i<30):
    
    if(len(temp_input)>100):
        X_input=np.array(temp_input[1:])
        print("{} day input {}".format(i,X_input))
        X_input=X_input.reshape(1,-1)
        X_input = X_input.reshape((1, n_steps, 1))
        #print(x_input)
        yhat = model.predict(X_input, verbose=0)
        print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
        lst_output.extend(yhat.tolist())
        i=i+1
    else:
        X_input = X_input.reshape((1, n_steps,1))
        yhat = model.predict(X_input, verbose=0)
        print(yhat[0])
        temp_input.extend(yhat[0].tolist())
        print(len(temp_input))
        lst_output.extend(yhat.tolist())
        i=i+1

In [43]:
day_new=np.arange(1,101)
day_pred=np.arange(101,131)

In [44]:
df_new = df_high.tolist()
df_new.extend(lst_output)

In [47]:
plt.plot(day_new,scaler.inverse_transform(df_high[1935:]))
plt.plot(day_pred,scaler.inverse_transform(lst_output))

In [48]:
df_new = df_high.tolist()
df_new.extend(lst_output)
plt.plot(df_new[2000:])

In [49]:
df_new = scaler.inverse_transform(df_new).tolist()

In [50]:
plt.plot(df_new)