In [53]:
# Imports

import math

import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler

from tools.csv_tools import save_df_to_csv
from tools.historical_data import retrieve_historical_data

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import finplot as fplt

In [54]:
# Setting model parameters

training_data_percentage = 0.8
lookback_period = 60

In [55]:
# Setting historical data parameters

stock_name = "NIFTY 50"
from_date_string = "27-Aug-2021 09:15"
to_date_string = "27-Jun-2023 15:15"
interval = "60minute"

In [56]:
# Retrieving historical data

stock_data = retrieve_historical_data(stock_name=stock_name, from_date_string=from_date_string, to_date_string=to_date_string, interval=interval, indicator_dates=True)

save_df_to_csv(stock_data, "source_data.csv")

In [57]:
# Using only close prices for the LSTM model

close_prices = stock_data['close']
values = close_prices.values
len(values)

3178

In [58]:
# Calculating length of training data

training_data_len = math.ceil(len(values) * training_data_percentage)
print(training_data_len)
values

2543


array([16636.75, 16667.25, 16689.9 , ..., 18775.4 , 18824.8 , 18818.6 ])

In [59]:
# Scaling data

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(values.reshape(-1, 1))

train_data = scaled_data[0: training_data_len, :]
train_data

array([[0.3805144 ],
       [0.38897675],
       [0.39526108],
       ...,
       [0.69342711],
       [0.71354253],
       [0.71802342]])

In [60]:
# Constructing training data

x_train = []
y_train = []

for i in range(lookback_period, len(train_data)):
    x_train.append(train_data[i - lookback_period:i, 0])
    y_train.append(train_data[i, 0])

In [61]:
len(x_train)

2483

In [62]:
y_train

[0.571486044059708,
 0.5787553409910657,
 0.5841518228733147,
 0.5713056989068308,
 0.578214305532434,
 0.5777426335941396,
 0.5697103379390711,
 0.5767715443094161,
 0.5824316075689469,
 0.5806559014483099,
 0.5612618611619773,
 0.5689750846234949,
 0.5729426779867932,
 0.5811691914988071,
 0.5820847899672605,
 0.5805449198157708,
 0.5786027412463239,
 0.5969424560235277,
 0.5990788524499191,
 0.5951112590866208,
 0.5897425226125073,
 0.5899367404694518,
 0.5855945840963326,
 0.5872870539925641,
 0.6053215692802842,
 0.6068336940236394,
 0.6111619776926913,
 0.6184035292159153,
 0.6256034626269349,
 0.6236751567615562,
 0.6251040452805059,
 0.6365074080239719,
 0.6300982187447977,
 0.6417929082736809,
 0.6422923256201098,
 0.6499500582653566,
 0.6548471228011765,
 0.6554159036679428,
 0.6897092281227462,
 0.6910687531213577,
 0.6532101437212132,
 0.6624632373342214,
 0.6567199378502853,
 0.6424310526607844,
 0.6483824427057323,
 0.6334692858331943,
 0.6505188391321237,
 0.623037012374

In [63]:
len(y_train)

2483

In [64]:
x_train, y_train = np.array(x_train), np.array(y_train)

In [65]:
y_train

array([0.57148604, 0.57875534, 0.58415182, ..., 0.69342711, 0.71354253,
       0.71802342])

In [66]:
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

In [67]:
x_train

array([[[0.3805144 ],
        [0.38897675],
        [0.39526108],
        ...,
        [0.58322235],
        [0.58240386],
        [0.55449198]],

       [[0.38897675],
        [0.39526108],
        [0.39798013],
        ...,
        [0.58240386],
        [0.55449198],
        [0.57148604]],

       [[0.39526108],
        [0.39798013],
        [0.39703679],
        ...,
        [0.55449198],
        [0.57148604],
        [0.57875534]],

       ...,

       [[0.69020865],
        [0.73532268],
        [0.67229898],
        ...,
        [0.69475889],
        [0.69457855],
        [0.69356584]],

       [[0.73532268],
        [0.67229898],
        [0.65737195],
        ...,
        [0.69457855],
        [0.69356584],
        [0.69342711]],

       [[0.67229898],
        [0.65737195],
        [0.6492703 ],
        ...,
        [0.69356584],
        [0.69342711],
        [0.71354253]]])

In [68]:
x_train.shape

(2483, 60, 1)

In [69]:
y_train

array([0.57148604, 0.57875534, 0.58415182, ..., 0.69342711, 0.71354253,
       0.71802342])

In [70]:
test_data = scaled_data[training_data_len - lookback_period:, :]

x_test = []
y_test = []

for i in range(lookback_period, len(test_data)):
    x_test.append(test_data[i - lookback_period:i, 0])
    y_test.append(test_data[i, 0])

len(test_data)

695

In [71]:
x_test

[array([0.65737195, 0.6492703 , 0.64033627, 0.64687032, 0.65316853,
        0.63480107, 0.63216525, 0.65078242, 0.65103213, 0.66887243,
        0.65515232, 0.66916375, 0.67754287, 0.68877976, 0.71832862,
        0.71180845, 0.69609067, 0.68073359, 0.68339715, 0.68155208,
        0.69584096, 0.69265024, 0.69249764, 0.68961212, 0.69423173,
        0.68216248, 0.66293491, 0.68356362, 0.68106653, 0.68117752,
        0.72038178, 0.71455524, 0.71762111, 0.71763498, 0.72699906,
        0.72301759, 0.72060374, 0.7187448 , 0.7214361 , 0.71258532,
        0.7119333 , 0.73096665, 0.72921869, 0.73020365, 0.71139226,
        0.72176905, 0.70985239, 0.70956107, 0.71728816, 0.72040952,
        0.71529049, 0.6883497 , 0.70052994, 0.68851618, 0.69475889,
        0.69457855, 0.69356584, 0.69342711, 0.71354253, 0.71802342]),
 array([0.6492703 , 0.64033627, 0.64687032, 0.65316853, 0.63480107,
        0.63216525, 0.65078242, 0.65103213, 0.66887243, 0.65515232,
        0.66916375, 0.67754287, 0.68877976, 0.

In [72]:
len(x_test)

635

In [73]:
y_test

[0.7302730148160474,
 0.7356140058820255,
 0.7426335941401696,
 0.7379030020531596,
 0.7370706398091107,
 0.7375978025636751,
 0.7423145219466178,
 0.7352255701681374,
 0.7376116752677424,
 0.7409827423561399,
 0.7629432328949557,
 0.7651906109538871,
 0.7888990622052052,
 0.7904666777648295,
 0.7835580711392263,
 0.7820320736918038,
 0.7816991287941848,
 0.7682148604405965,
 0.7629848510071584,
 0.7522196326507959,
 0.7545918650463346,
 0.7514288885189497,
 0.7368486765440316,
 0.7338244270573222,
 0.7447283724543592,
 0.7428278119971141,
 0.7473919316353141,
 0.7489734198990066,
 0.735586260473891,
 0.7319793574163471,
 0.7149298041174186,
 0.7154292214638476,
 0.7164974196770437,
 0.7321319571610898,
 0.7332833915986905,
 0.7207147217135565,
 0.7181898895732752,
 0.7228927362521498,
 0.7085761056545143,
 0.7149436768214859,
 0.6708839687031789,
 0.6672077021252978,
 0.6621025470284669,
 0.6502275123467056,
 0.6306253814993621,
 0.6360357360856774,
 0.6345929748626604,
 0.64281948837

In [74]:
x_test, y_test = np.array(x_test), np.array(y_test)

In [75]:
x_test

array([[0.65737195, 0.6492703 , 0.64033627, ..., 0.69342711, 0.71354253,
        0.71802342],
       [0.6492703 , 0.64033627, 0.64687032, ..., 0.71354253, 0.71802342,
        0.73027301],
       [0.64033627, 0.64687032, 0.65316853, ..., 0.71802342, 0.73027301,
        0.73561401],
       ...,
       [0.97416903, 0.97025692, 0.96049054, ..., 0.95915876, 0.96701071,
        0.96488819],
       [0.97025692, 0.96049054, 0.95943621, ..., 0.96701071, 0.96488819,
        0.97389157],
       [0.96049054, 0.95943621, 0.95458077, ..., 0.96488819, 0.97389157,
        0.9875978 ]])

In [76]:
y_test

array([0.73027301, 0.73561401, 0.74263359, 0.737903  , 0.73707064,
       0.7375978 , 0.74231452, 0.73522557, 0.73761168, 0.74098274,
       0.76294323, 0.76519061, 0.78889906, 0.79046668, 0.78355807,
       0.78203207, 0.78169913, 0.76821486, 0.76298485, 0.75221963,
       0.75459187, 0.75142889, 0.73684868, 0.73382443, 0.74472837,
       0.74282781, 0.74739193, 0.74897342, 0.73558626, 0.73197936,
       0.7149298 , 0.71542922, 0.71649742, 0.73213196, 0.73328339,
       0.72071472, 0.71818989, 0.72289274, 0.70857611, 0.71494368,
       0.67088397, 0.6672077 , 0.66210255, 0.65022751, 0.63062538,
       0.63603574, 0.63459297, 0.64281949, 0.64237556, 0.63452361,
       0.64112702, 0.63919871, 0.62094223, 0.62663004, 0.63360801,
       0.62542312, 0.61725209, 0.6010349 , 0.60471117, 0.61285445,
       0.6104406 , 0.56631153, 0.58580267, 0.57259586, 0.56689418,
       0.57957383, 0.59297486, 0.59404306, 0.586857  , 0.58535875,
       0.58811942, 0.57802009, 0.55378447, 0.56829532, 0.56930

In [77]:
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [78]:
x_test

array([[[0.65737195],
        [0.6492703 ],
        [0.64033627],
        ...,
        [0.69342711],
        [0.71354253],
        [0.71802342]],

       [[0.6492703 ],
        [0.64033627],
        [0.64687032],
        ...,
        [0.71354253],
        [0.71802342],
        [0.73027301]],

       [[0.64033627],
        [0.64687032],
        [0.65316853],
        ...,
        [0.71802342],
        [0.73027301],
        [0.73561401]],

       ...,

       [[0.97416903],
        [0.97025692],
        [0.96049054],
        ...,
        [0.95915876],
        [0.96701071],
        [0.96488819]],

       [[0.97025692],
        [0.96049054],
        [0.95943621],
        ...,
        [0.96701071],
        [0.96488819],
        [0.97389157]],

       [[0.96049054],
        [0.95943621],
        [0.95458077],
        ...,
        [0.96488819],
        [0.97389157],
        [0.9875978 ]]])

In [79]:
y_test

array([0.73027301, 0.73561401, 0.74263359, 0.737903  , 0.73707064,
       0.7375978 , 0.74231452, 0.73522557, 0.73761168, 0.74098274,
       0.76294323, 0.76519061, 0.78889906, 0.79046668, 0.78355807,
       0.78203207, 0.78169913, 0.76821486, 0.76298485, 0.75221963,
       0.75459187, 0.75142889, 0.73684868, 0.73382443, 0.74472837,
       0.74282781, 0.74739193, 0.74897342, 0.73558626, 0.73197936,
       0.7149298 , 0.71542922, 0.71649742, 0.73213196, 0.73328339,
       0.72071472, 0.71818989, 0.72289274, 0.70857611, 0.71494368,
       0.67088397, 0.6672077 , 0.66210255, 0.65022751, 0.63062538,
       0.63603574, 0.63459297, 0.64281949, 0.64237556, 0.63452361,
       0.64112702, 0.63919871, 0.62094223, 0.62663004, 0.63360801,
       0.62542312, 0.61725209, 0.6010349 , 0.60471117, 0.61285445,
       0.6104406 , 0.56631153, 0.58580267, 0.57259586, 0.56689418,
       0.57957383, 0.59297486, 0.59404306, 0.586857  , 0.58535875,
       0.58811942, 0.57802009, 0.55378447, 0.56829532, 0.56930

In [80]:
# Defining the LSTM model
model = keras.Sequential()
model.add(layers.LSTM(100, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(layers.LSTM(100, return_sequences=False))
model.add(layers.Dense(25))
model.add(layers.Dense(1))
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 60, 100)           40800     
                                                                 
 lstm_3 (LSTM)               (None, 100)               80400     
                                                                 
 dense_2 (Dense)             (None, 25)                2525      
                                                                 
 dense_3 (Dense)             (None, 1)                 26        
                                                                 
Total params: 123,751
Trainable params: 123,751
Non-trainable params: 0
_________________________________________________________________
None


In [81]:
# Compiling the model

model.compile(optimizer='adam', loss='mean_squared_error')

In [82]:
# Training the model

model.fit(x_train, y_train, batch_size=1, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x2cfcd0d47c0>

In [83]:
# Generating predictions for the test data

predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)



In [84]:
predictions

array([[17781.326],
       [17828.229],
       [17842.713],
       [17867.854],
       [17844.748],
       [17843.865],
       [17845.758],
       [17863.957],
       [17832.797],
       [17846.271],
       [17857.943],
       [17944.094],
       [17940.25 ],
       [18035.293],
       [18026.305],
       [17998.465],
       [17996.13 ],
       [17994.922],
       [17940.518],
       [17927.682],
       [17886.9  ],
       [17902.799],
       [17888.486],
       [17832.596],
       [17829.027],
       [17874.1  ],
       [17860.723],
       [17880.912],
       [17884.668],
       [17830.65 ],
       [17823.932],
       [17757.646],
       [17768.844],
       [17772.494],
       [17834.578],
       [17830.969],
       [17781.059],
       [17777.72 ],
       [17797.299],
       [17738.082],
       [17771.   ],
       [17593.736],
       [17600.64 ],
       [17581.71 ],
       [17538.371],
       [17467.764],
       [17496.998],
       [17489.959],
       [17523.316],
       [17518.883],


In [85]:
predictions = predictions.reshape(-1)
predictions

array([17781.326, 17828.229, 17842.713, 17867.854, 17844.748, 17843.865,
       17845.758, 17863.957, 17832.797, 17846.271, 17857.943, 17944.094,
       17940.25 , 18035.293, 18026.305, 17998.465, 17996.13 , 17994.922,
       17940.518, 17927.682, 17886.9  , 17902.799, 17888.486, 17832.596,
       17829.027, 17874.1  , 17860.723, 17880.912, 17884.668, 17830.65 ,
       17823.932, 17757.646, 17768.844, 17772.494, 17834.578, 17830.969,
       17781.059, 17777.72 , 17797.299, 17738.082, 17771.   , 17593.736,
       17600.64 , 17581.71 , 17538.371, 17467.764, 17496.998, 17489.959,
       17523.316, 17518.883, 17489.04 , 17517.854, 17507.898, 17438.2  ,
       17466.871, 17491.81 , 17457.775, 17429.29 , 17369.777, 17389.434,
       17419.828, 17408.143, 17240.77 , 17328.072, 17272.865, 17255.54 ,
       17305.703, 17353.797, 17354.098, 17326.244, 17322.498, 17333.396,
       17294.09 , 17205.115, 17266.4  , 17266.986, 17358.85 , 17379.828,
       17353.236, 17361.6  , 17391.188, 17392.914, 

In [86]:
predictions = list(predictions)
predictions

[17781.326,
 17828.229,
 17842.713,
 17867.854,
 17844.748,
 17843.865,
 17845.758,
 17863.957,
 17832.797,
 17846.271,
 17857.943,
 17944.094,
 17940.25,
 18035.293,
 18026.305,
 17998.465,
 17996.13,
 17994.922,
 17940.518,
 17927.682,
 17886.9,
 17902.799,
 17888.486,
 17832.596,
 17829.027,
 17874.1,
 17860.723,
 17880.912,
 17884.668,
 17830.65,
 17823.932,
 17757.646,
 17768.844,
 17772.494,
 17834.578,
 17830.969,
 17781.059,
 17777.72,
 17797.299,
 17738.082,
 17771.0,
 17593.736,
 17600.64,
 17581.71,
 17538.371,
 17467.764,
 17496.998,
 17489.959,
 17523.316,
 17518.883,
 17489.04,
 17517.854,
 17507.898,
 17438.2,
 17466.871,
 17491.81,
 17457.775,
 17429.29,
 17369.777,
 17389.434,
 17419.828,
 17408.143,
 17240.77,
 17328.072,
 17272.865,
 17255.54,
 17305.703,
 17353.797,
 17354.098,
 17326.244,
 17322.498,
 17333.396,
 17294.09,
 17205.115,
 17266.4,
 17266.986,
 17358.85,
 17379.828,
 17353.236,
 17361.6,
 17391.188,
 17392.914,
 17403.7,
 17292.135,
 17324.168,
 17333.

In [87]:
actual_test_data = stock_data.iloc[training_data_len:, :][["date", "close"]]
actual_test_data
save_df_to_csv(actual_test_data, "actual_test_data.csv")

In [88]:
predicted_test_data = actual_test_data.copy()
predicted_test_data["close"] = predictions
predicted_test_data
save_df_to_csv(predicted_test_data, "predicted_test_data.csv")

In [89]:
actual_test_data_list = list(actual_test_data["close"])

In [90]:
predicted_test_data_list = list(predicted_test_data["close"])

In [95]:
# Calculating the number of data points that were predicted correctly within a threshold percentage

threshold_percentage = 0.0005

correct_predictions = 0
for actual_data, predicted_data in zip(actual_test_data_list, predicted_test_data_list):
    if (abs(predicted_data-actual_data)/actual_data) <= threshold_percentage:
        correct_predictions += 1

print(round(correct_predictions/len(predictions) * 100, 2))

2.36
