In [1]:
# Imports

import math

import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler

from tools.csv_tools import save_df_to_csv
from tools.historical_data import retrieve_historical_data

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import finplot as fplt

In [2]:
# Setting model parameters

training_data_percentage = 0.8
lookback_period = 80

In [3]:
# Setting historical data parameters

stock_name = "NIFTY 50"
from_date_string = "27-Aug-2021 09:15"
to_date_string = "27-Jun-2023 15:15"
interval = "10minute"

In [4]:
# Retrieving historical data

stock_data = retrieve_historical_data(stock_name=stock_name, from_date_string=from_date_string, to_date_string=to_date_string, interval=interval, indicator_dates=True)

save_df_to_csv(stock_data, "source_data.csv")

In [5]:
# Using only close prices for the LSTM model

close_prices = stock_data['close']
values = close_prices.values
len(values)

17251

In [6]:
# Calculating length of training data

training_data_len = math.ceil(len(values) * training_data_percentage)
print(training_data_len)
values

13801


array([16628.35, 16581.35, 16598.5 , ..., 18816.65, 18824.8 , 18817.2 ])

In [7]:
# Scaling data

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(values.reshape(-1, 1))

train_data = scaled_data[0: training_data_len, :]
train_data

array([[0.38810865],
       [0.3753164 ],
       [0.37998421],
       ...,
       [0.71700553],
       [0.71708718],
       [0.7192918 ]])

In [8]:
# Constructing training data

x_train = []
y_train = []

for i in range(lookback_period, len(train_data)):
    x_train.append(train_data[i - lookback_period:i, 0])
    y_train.append(train_data[i, 0])

In [9]:
len(x_train)

13721

In [10]:
y_train

[0.4736261941700004,
 0.4771100405541482,
 0.47739582482784915,
 0.48524808796712104,
 0.48595574426390087,
 0.48192754688222905,
 0.4810701940611306,
 0.48108380283606866,
 0.4781579162243812,
 0.48081162733730665,
 0.48229498380555746,
 0.4837783402738074,
 0.483438120900356,
 0.4891401975994114,
 0.4880923219291802,
 0.4941618355515631,
 0.4937263547535453,
 0.4960942815927698,
 0.49918347350371484,
 0.5014561389183738,
 0.4892082414741017,
 0.495930976293514,
 0.49902016820445727,
 0.5051305081516553,
 0.5004354807980178,
 0.499727824501238,
 0.5055523801747359,
 0.5082469176124764,
 0.5084238316866712,
 0.5096077951062838,
 0.5195013744862678,
 0.5279252061729398,
 0.5258974987071658,
 0.5250129283361904,
 0.5384583979750142,
 0.538921096322909,
 0.539914536893388,
 0.5441332571241926,
 0.5497672899485586,
 0.5437794289758031,
 0.5323888843526303,
 0.5296399118151376,
 0.5350425954655558,
 0.5290683432677383,
 0.5209030783048902,
 0.5199640728341626,
 0.5190522849133119,
 0.518984

In [11]:
len(y_train)

13721

In [12]:
x_train, y_train = np.array(x_train), np.array(y_train)

In [13]:
y_train

array([0.47362619, 0.47711004, 0.47739582, ..., 0.71700553, 0.71708718,
       0.7192918 ])

In [14]:
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

In [15]:
x_train

array([[[0.38810865],
        [0.3753164 ],
        [0.37998421],
        ...,
        [0.46982935],
        [0.46895838],
        [0.47577638]],

       [[0.3753164 ],
        [0.37998421],
        [0.38876187],
        ...,
        [0.46895838],
        [0.47577638],
        [0.47362619]],

       [[0.37998421],
        [0.38876187],
        [0.38868022],
        ...,
        [0.47577638],
        [0.47362619],
        [0.47711004]],

       ...,

       [[0.71997224],
        [0.7112218 ],
        [0.71018753],
        ...,
        [0.72065268],
        [0.71069105],
        [0.71595765]],

       [[0.7112218 ],
        [0.71018753],
        [0.71387551],
        ...,
        [0.71069105],
        [0.71595765],
        [0.71700553]],

       [[0.71018753],
        [0.71387551],
        [0.7194551 ],
        ...,
        [0.71595765],
        [0.71700553],
        [0.71708718]]])

In [16]:
x_train.shape

(13721, 80, 1)

In [17]:
y_train

array([0.47362619, 0.47711004, 0.47739582, ..., 0.71700553, 0.71708718,
       0.7192918 ])

In [18]:
test_data = scaled_data[training_data_len - lookback_period:, :]

x_test = []
y_test = []

for i in range(lookback_period, len(test_data)):
    x_test.append(test_data[i - lookback_period:i, 0])
    y_test.append(test_data[i, 0])

len(test_data)

3530

In [19]:
x_test

[array([0.71387551, 0.7194551 , 0.71497782, 0.71496421, 0.71451512,
        0.71915571, 0.72255791, 0.72295256, 0.72515718, 0.72723932,
        0.72255791, 0.72073433, 0.72039411, 0.71579434, 0.71346724,
        0.71676057, 0.71642035, 0.71599848, 0.71652922, 0.71360333,
        0.71318146, 0.71271876, 0.7132495 , 0.71797175, 0.71749544,
        0.72159168, 0.72076155, 0.72151003, 0.71734574, 0.71669252,
        0.71832558, 0.72170055, 0.72382352, 0.72319752, 0.71880188,
        0.72189108, 0.7109224 , 0.71516834, 0.70594159, 0.69811655,
        0.69237364, 0.69302686, 0.70062056, 0.70143709, 0.70309736,
        0.70345119, 0.70432215, 0.70165483, 0.69531314, 0.69787159,
        0.69915081, 0.69611606, 0.69253695, 0.68661713, 0.69139381,
        0.69012819, 0.69056368, 0.69403391, 0.6986609 , 0.69938216,
        0.69906916, 0.6978852 , 0.69706867, 0.69900112, 0.69848398,
        0.69949103, 0.70459432, 0.70628181, 0.70413162, 0.70077026,
        0.69749054, 0.69765385, 0.69735445, 0.71

In [20]:
len(x_test)

3450

In [21]:
y_test

[0.7188835361040802,
 0.7202444135978876,
 0.7160937372417733,
 0.7195231485261688,
 0.7214828121172534,
 0.7219727280150234,
 0.7236466073324079,
 0.7262867096703944,
 0.7275387169646983,
 0.7286138101848074,
 0.7334993603875768,
 0.7344247570833673,
 0.7340709289349769,
 0.7392830897362614,
 0.7371601208459211,
 0.7381399526414629,
 0.7387387387387383,
 0.7389428703628091,
 0.7358264609019898,
 0.7355951117280419,
 0.7416101902506735,
 0.7449443401105036,
 0.7456247788574073,
 0.7440869872894034,
 0.7454614735581497,
 0.7459649982308596,
 0.7448626874608744,
 0.7446041207370513,
 0.7409841866035212,
 0.7415149288261063,
 0.740167660107236,
 0.7289676383331969,
 0.7242317846547444,
 0.7237146512070982,
 0.7272529326909991,
 0.7378133420429487,
 0.7406847935548839,
 0.7443183364633503,
 0.7401404425573608,
 0.7390517405623145,
 0.7419231920742488,
 0.7413652323017876,
 0.7453117770338311,
 0.7484145777197133,
 0.7458561280313543,
 0.7456383876323454,
 0.7451756892844505,
 0.74083449007

In [22]:
x_test, y_test = np.array(x_test), np.array(y_test)

In [23]:
x_test

array([[0.71387551, 0.7194551 , 0.71497782, ..., 0.71700553, 0.71708718,
        0.7192918 ],
       [0.7194551 , 0.71497782, 0.71496421, ..., 0.71708718, 0.7192918 ,
        0.71888354],
       [0.71497782, 0.71496421, 0.71451512, ..., 0.7192918 , 0.71888354,
        0.72024441],
       ...,
       [0.94608203, 0.94755178, 0.94957949, ..., 0.97578999, 0.9716257 ,
        0.97847092],
       [0.94755178, 0.94957949, 0.95113089, ..., 0.9716257 , 0.97847092,
        0.9837103 ],
       [0.94957949, 0.95113089, 0.94597316, ..., 0.97847092, 0.9837103 ,
        0.98592853]])

In [24]:
y_test

array([0.71888354, 0.72024441, 0.71609374, ..., 0.9837103 , 0.98592853,
       0.98385999])

In [25]:
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [26]:
x_test

array([[[0.71387551],
        [0.7194551 ],
        [0.71497782],
        ...,
        [0.71700553],
        [0.71708718],
        [0.7192918 ]],

       [[0.7194551 ],
        [0.71497782],
        [0.71496421],
        ...,
        [0.71708718],
        [0.7192918 ],
        [0.71888354]],

       [[0.71497782],
        [0.71496421],
        [0.71451512],
        ...,
        [0.7192918 ],
        [0.71888354],
        [0.72024441]],

       ...,

       [[0.94608203],
        [0.94755178],
        [0.94957949],
        ...,
        [0.97578999],
        [0.9716257 ],
        [0.97847092]],

       [[0.94755178],
        [0.94957949],
        [0.95113089],
        ...,
        [0.9716257 ],
        [0.97847092],
        [0.9837103 ]],

       [[0.94957949],
        [0.95113089],
        [0.94597316],
        ...,
        [0.97847092],
        [0.9837103 ],
        [0.98592853]]])

In [27]:
y_test

array([0.71888354, 0.72024441, 0.71609374, ..., 0.9837103 , 0.98592853,
       0.98385999])

In [28]:
# Defining the LSTM model
model = keras.Sequential()
model.add(layers.LSTM(100, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(layers.LSTM(100, return_sequences=False))
model.add(layers.Dense(25))
model.add(layers.Dense(1))
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 80, 100)           40800     
                                                                 
 lstm_1 (LSTM)               (None, 100)               80400     
                                                                 
 dense (Dense)               (None, 25)                2525      
                                                                 
 dense_1 (Dense)             (None, 1)                 26        
                                                                 
Total params: 123,751
Trainable params: 123,751
Non-trainable params: 0
_________________________________________________________________
None


In [29]:
# Compiling the model

model.compile(optimizer='adam', loss='mean_squared_error')

In [30]:
# Training the model

model.fit(x_train, y_train, batch_size=1, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x2df52811be0>

In [31]:
# Generating predictions for the test data

predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)



In [32]:
predictions

array([[17873.729],
       [17871.367],
       [17877.021],
       ...,
       [18825.252],
       [18842.28 ],
       [18850.018]], dtype=float32)

In [33]:
predictions = predictions.reshape(-1)
predictions

array([17873.729, 17871.367, 17877.021, ..., 18825.252, 18842.28 ,
       18850.018], dtype=float32)

In [34]:
predictions = list(predictions)
predictions

[17873.729,
 17871.367,
 17877.021,
 17860.215,
 17875.096,
 17881.242,
 17882.969,
 17889.635,
 17899.6,
 17903.924,
 17908.07,
 17927.041,
 17929.268,
 17928.139,
 17948.734,
 17938.738,
 17943.768,
 17945.504,
 17946.197,
 17933.945,
 17933.986,
 17957.252,
 17968.434,
 17970.824,
 17965.072,
 17971.057,
 17972.41,
 17968.049,
 17967.37,
 17953.13,
 17956.244,
 17950.377,
 17907.018,
 17891.639,
 17889.822,
 17903.371,
 17943.73,
 17952.285,
 17967.113,
 17950.164,
 17947.643,
 17958.594,
 17955.217,
 17970.975,
 17981.732,
 17971.217,
 17971.475,
 17969.412,
 17952.5,
 17943.986,
 17946.76,
 17958.898,
 17960.79,
 17963.441,
 17949.553,
 17953.486,
 17953.459,
 17946.71,
 17941.416,
 17957.5,
 17965.418,
 17964.97,
 17962.639,
 17982.145,
 17986.658,
 17993.0,
 18036.736,
 18044.523,
 18062.877,
 18052.51,
 18157.143,
 18156.848,
 18162.586,
 18148.477,
 18156.268,
 18138.07,
 18135.043,
 18123.592,
 18140.338,
 18146.373,
 18147.473,
 18144.258,
 18141.938,
 18143.514,
 18133.496,

In [35]:
actual_test_data = stock_data.iloc[training_data_len:, :][["date", "close"]]
actual_test_data
save_df_to_csv(actual_test_data, "actual_test_data.csv")

In [36]:
predicted_test_data = actual_test_data.copy()
predicted_test_data["close"] = predictions
predicted_test_data
save_df_to_csv(predicted_test_data, "predicted_test_data.csv")

In [37]:
actual_test_data_list = list(actual_test_data["close"])

In [38]:
predicted_test_data_list = list(predicted_test_data["close"])

In [43]:
# Calculating the number of data points that were predicted correctly within a threshold percentage

threshold_percentage = 0.001

correct_predictions = 0
for actual_data, predicted_data in zip(actual_test_data_list, predicted_test_data_list):
    if (abs(predicted_data-actual_data)/actual_data) <= threshold_percentage:
        correct_predictions += 1

print(round(correct_predictions/len(predictions) * 100, 2))

24.14
