In this notebook we'll implement Deep Learning based LSTM learning model for predcting stock's closing prices .



In [None]:
!pip install yfinance 

Collecting yfinance
  Downloading yfinance-0.1.70-py2.py3-none-any.whl (26 kB)
Collecting requests>=2.26
  Downloading requests-2.27.1-py2.py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 1.7 MB/s 
Collecting lxml>=4.5.1
  Downloading lxml-4.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.4 MB)
[K     |████████████████████████████████| 6.4 MB 17.0 MB/s 
Installing collected packages: requests, lxml, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests

In [None]:
#Importing libraries
import numpy as np
import pandas as pd
import yfinance as yf
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
import math
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

# For reading stock data from yahoo
from pandas_datareader.data import DataReader

# For time stamps
from datetime import datetime
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
#Data Collection::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
def collectData(ticker):
  dataset = yf.download(ticker, start ='2015-01-01', end='2021-12-31', threads=True, progress = False)
  return dataset

In [None]:
def DataPreprocessing(dataset):
  #Data Pre-processing:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
  
  dataset = dataset.drop(columns = 'Adj Close', axis = 1)

  #Deriving new features : 
  dataset['Range'] = dataset['High']-dataset['Low']
  dataset['a/d'] = ((dataset['Close']-dataset['Low'])*(dataset['High']-dataset['Close']))/(dataset['High']-dataset['Low'])
  dataset['15MA'] = dataset['Close'].rolling(window = 15).mean()
  dataset['15EMA'] = dataset['Close'].ewm(span=15, adjust=False).mean()

  #dvel = dataset['Close']
  #dv2 = np.array(dvel)
  #dv3 = np.array(dvel)
  #for i in range(1,len(dvel)):
    #dv3[i] = dvel[i]-dvel[i-1]

  #dfvel = pd.DataFrame(dv3)
  #dataset['1st_Order'] = dfvel

  #dvel = dataset['Close']
  #da2 = np.array(dvel)
  #for i in range(2,len(dvel)):
    #da2[i] = dvel[i]-2*dvel[i-1]+dvel[i-2]

  #dfacc=pd.DataFrame(da2)
  #dataset['2nd_Order'] = dfacc

  close = dataset.iloc[15:, 3:4]
  dataset = dataset.drop(columns = 'Close', axis=1)
  dataset = dataset.iloc[15:, :]
  dataset['Close'] = close 

  #Removing Highly Co-related Data
  dataset = dataset.drop(columns = ['High', 'Low'], axis = 1)
  columns = ['Open', 'Volume', 'Range', 'a/d', '15MA', '15EMA', 'Close'] 
  for i in tqdm(columns) :
     Q1 = dataset[i].quantile(0.25)
     Q3 = dataset[i].quantile(0.75)
     IQR = Q3-Q1
     u = Q3 + (1.5*IQR)
     l = Q1 - (1.5*IQR)
     dataset = dataset.loc[(dataset[i]>l) & (dataset[i]<u),:]
  return dataset

In [None]:
def DataNorm(dataset, normalizer_x, normalizer_y):
  
  X = dataset.iloc[:, 0:6]
  Y = dataset.iloc[:, 6:]
  #X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.35, random_state=42 , shuffle = False)
  train_sz = int(X.shape[0]*0.65)
  X_train,X_test = X.iloc[0:train_sz,:],X.iloc[train_sz:X.shape[0],:]
  Y_train,Y_test = Y.iloc[0:train_sz],Y.iloc[train_sz:Y.shape[0]]

  
  
  normalizedData_x = normalizer_x.fit_transform(X_train)
  X_train = pd.DataFrame(normalizedData_x, index = X_train.index)
  normX_test = normalizer_x.transform(X_test)
  X_test = pd.DataFrame(normX_test, index = X_test.index)
 
  normalizedData_y = normalizer_y.fit_transform(Y_train)
  Y_train = pd.DataFrame(normalizedData_y, index = Y_train.index)
  normY_test = normalizer_y.transform(Y_test)
  Y_test = pd.DataFrame(normY_test, index = Y_test.index)
  return X_train,X_test,Y_train,Y_test

In [None]:
#Creating Sequential Data ::
def create_sequential_feed(X, y, seq_len):
  tr_sequences, ts_sequences = [], []
  for i in range(X.shape[0] - seq_len + 1):
    tr_sequences.append(X.iloc[i:min(X.shape[0], i+seq_len)].values)
    ts_sequences.append(y.iloc[min(y.shape[0], i+seq_len) - 1])
  return np.array(tr_sequences), np.array(ts_sequences)

In [None]:
def ModelTrain(X_train,X_test,Y_train,Y_test):
  #Model Training ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
  #Create the Stacked LSTM model
  
  model=Sequential()
  model.add(LSTM(250, activation = 'tanh', return_sequences=True,input_shape=(10,6)))
  model.add(LSTM(250, activation = 'tanh', return_sequences=False, dropout = 0.3))
  #model.add(LSTM(50, dropout = 0.2))
  model.add(Dense(1))
  model.compile(loss='mean_squared_error',optimizer='adam')
  model.summary()

  model.fit(X_train,Y_train,validation_data=(X_test,Y_test),epochs=20,batch_size=64,verbose=1)
  train_predict=model.predict(X_train)
  test_predict=model.predict(X_test)
  return train_predict,test_predict

**Main Function for Training & Testing Data**

In [None]:
def train(ticker):
  
  dataset = collectData(ticker)
  df = DataPreprocessing(dataset)
  normalizer_x = MinMaxScaler(feature_range=(0,1))
  normalizer_y = MinMaxScaler(feature_range=(0,1))
  X_train,X_test,Y_train,Y_test = DataNorm(df,normalizer_x,normalizer_y)


  #Creating Sequential Data  
  X_train, Y_train = create_sequential_feed(X_train, Y_train, 10)
  X_test, Y_test = create_sequential_feed(X_test, Y_test, 10)
  
  #Training the Model
  train_predict,test_predict = ModelTrain(X_train,X_test,Y_train,Y_test)

  #Results
  Y_train = normalizer_y.inverse_transform(Y_train)
  test_predict = normalizer_y.inverse_transform(test_predict)
  Y_test = normalizer_y.inverse_transform(Y_test)
  rmse_test = math.sqrt(mean_squared_error(Y_test,test_predict))
  return  Y_train , test_predict,rmse_test
  