# Share price prediction using RSI indiacator with Bi-directional RNN architechture

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from datetime import datetime, time, date
import matplotlib.pyplot as plt
from rsi import rsi

## Importing data

In [2]:
df=pd.read_csv(r"Share prices\Asian_paints_15Y.csv",index_col="Date",parse_dates=True)

In [3]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-01-17,31.500000,31.910000,31.299999,31.844999,7.543048,64970.0
2005-01-18,31.500000,32.000000,31.500000,31.930000,7.563182,63840.0
2005-01-19,32.299999,32.500000,31.770000,32.259998,7.641345,623920.0
2005-01-20,32.900002,33.200001,32.000000,32.404999,7.675693,161410.0
2005-01-24,32.400002,33.740002,32.099998,32.985001,7.813076,1160320.0
...,...,...,...,...,...,...
2020-10-09,2090.000000,2107.149902,2046.449951,2050.399902,2050.399902,45926.0
2020-10-12,2077.000000,2085.949951,2041.449951,2076.899902,2076.899902,41379.0
2020-10-13,2072.000000,2112.899902,2054.399902,2064.750000,2064.750000,48983.0
2020-10-14,2066.050049,2084.350098,2055.149902,2071.300049,2071.300049,53116.0


## Cleaning Data

In [4]:
df.isna().any()

Open         True
High         True
Low          True
Close        True
Adj Close    True
Volume       True
dtype: bool

In [5]:
df=df.dropna()

In [6]:
df.isna().any()

Open         False
High         False
Low          False
Close        False
Adj Close    False
Volume       False
dtype: bool

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3878 entries, 2005-01-17 to 2020-10-15
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       3878 non-null   float64
 1   High       3878 non-null   float64
 2   Low        3878 non-null   float64
 3   Close      3878 non-null   float64
 4   Adj Close  3878 non-null   float64
 5   Volume     3878 non-null   float64
dtypes: float64(6)
memory usage: 212.1 KB


## Getting RSI values

In [8]:
df1=df[['Close']].copy()
df1["rsi"]=rsi(df)

In [9]:
df1

Unnamed: 0_level_0,Close,rsi
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-01-17,31.844999,0.000000
2005-01-18,31.930000,0.000000
2005-01-19,32.259998,0.000000
2005-01-20,32.404999,0.000000
2005-01-24,32.985001,0.000000
...,...,...
2020-10-09,2050.399902,58.160477
2020-10-12,2076.899902,61.077012
2020-10-13,2064.750000,59.044779
2020-10-14,2071.300049,59.820940


## Plotting Price and RSI

In [None]:
plt.figure(figsize=(16,12))

plt.subplot(2,1,1)
plt.plot(dates[14:],df1.iloc[14:,0].values, linewidth =3)
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title("Price vs Time")
plt.grid()

plt.subplot(2,1,2)
plt.plot(dates[14:],df1.iloc[14:,1].values, linewidth =3)
plt.xlabel('Date')
plt.ylabel('RSI')
plt.hlines(70,dates[0],dates[-1],color="y",linestyles="dashed", linewidth =5)
plt.hlines(30,dates[0],dates[-1],color="y",linestyles="dashed", linewidth =5)
plt.title("RSI vs Time")
plt.grid()

# LSTM

## scaling the data in range (0,1)

In [None]:
from sklearn.preprocessing import MinMaxScaler
sc=MinMaxScaler(feature_range=(0,1))
df2=sc.fit_transform(np.array(df1.iloc[14:,:]).reshape(-1,2))

## Spliting training data

In [None]:
#70% training data & 30% testing data

training_size=int((len(df1)-14)*0.7)
test_size=(len(df1)-14)-training_size
training_data,test_data = df2[0 : training_size, :], df2[training_size:len(df2),:] 
# test data is  ==>  from 0.7*total to end

In [None]:
training_data

In [None]:
test_data

In [None]:
training_size, test_size

## Creating the training data set

In [None]:
# Convert an array of values into a dataset matrix
def create_dataset(dataset, timestep):
    dataY = []
    dataX=[]
    for i in range (len(dataset)-timestep-1):
        a=dataset[i:(i+timestep),:]                         #Ex - storing the values from 1 to 50
        dataX.append(a)
        dataY.append(dataset[i+timestep, 0])     #Ex - The output will be 51st day closing price
    return  np.array(dataX), np.array(dataY)

In [None]:
# creating train data and test data
timestep=100
X_train, Y_train =create_dataset(training_data, timestep)
print("X_train will start from "+str(timestep)+" to "+str(len(training_data)-timestep) )
X_test, Y_test = create_dataset(test_data, timestep)

# the test set will be ==>  from (total - timestep-1) to (total-1)

In [None]:
X_train.shape

In [None]:
X_test.shape

## Creating stack LSTM model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Bidirectional

In [None]:
"""
TO UNDERSTAND THE LSTM INPUT/OUTPUT SHAPE ---
https://medium.com/@shivajbd/understanding-input-and-output-shape-in-lstm-keras-c501ee95c65e
"""

"""
TO UNDERSTAND RETURN SEQUENCES ---
https://www.dlology.com/blog/how-to-use-return_state-or-return_sequences-in-keras/

return sequences= True  gives the output for all the time steps
if return sequences = False returns the putput of the last timestep  -- RNN layer will only return the last hidden state output a<T>
"""

"""
STACKED LSTM ---
https://machinelearningmastery.com/stacked-long-short-term-memory-networks/
"""

model =Sequential()
model.add(Bidirectional(LSTM(50, return_sequences=True), input_shape=(X_train.shape[1],2)))
model.add(Bidirectional(LSTM(50, return_sequences=True)))
model.add(Bidirectional(LSTM(50, return_sequences=True)))
model.add(Bidirectional(LSTM(50)))

model.add(Dense(1))
model.compile(loss="mean_squared_error", optimizer="adam")

In [None]:
model.summary()

## Creating checkpoint to store weights

In [None]:
import os
checkpoint_path = "D:\Python codes\Share price prediction\Trained weights\Bidirectional RNN with RSI/cp.ckpt"      #Run and save on computert
#checkpoint_path = "/content/drive/My Drive/Colab Notebooks/Share Price prediction/Weights/cp.ckpt"                                                                 #Save on drive
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
model.fit(X_train,Y_train,validation_data =(X_test, Y_test), 
                    epochs =100, 
                    batch_size=32,
                   verbose=1,
                    callbacks=[cp_callback])