In [118]:
# Imports

import math

import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler

from tools.csv_tools import save_df_to_csv
from tools.historical_data import retrieve_historical_data

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import finplot as fplt

In [119]:
# Setting model parameters

training_data_percentage = 0.8
lookback_period = 60

In [120]:
# Setting historical data parameters

stock_name = "NIFTY 50"
from_date_string = "29-Jun-2020 09:15"
to_date_string = "27-Jun-2023 15:15"
interval = "60minute"

In [121]:
# Retrieving historical data

stock_data = retrieve_historical_data(stock_name=stock_name, from_date_string=from_date_string, to_date_string=to_date_string, interval=interval, indicator_dates=True)

save_df_to_csv(stock_data, "source_data.csv")

In [122]:
# Using only close prices for the LSTM model

close_prices = stock_data['close']
values = close_prices.values
len(values)

5215

In [123]:
# Calculating length of training data

training_data_len = math.ceil(len(values) * training_data_percentage)
print(training_data_len)
values

4172


array([10242.2 , 10255.95, 10254.05, ..., 18775.4 , 18824.8 , 18818.6 ])

In [124]:
# Scaling data

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(values.reshape(-1, 1))

train_data = scaled_data[0: training_data_len, :]
train_data

array([[0.        ],
       [0.00159378],
       [0.00137355],
       ...,
       [0.92342911],
       [0.92812931],
       [0.92836693]])

In [125]:
# Constructing training data

x_train = []
y_train = []

for i in range(lookback_period, len(train_data)):
    x_train.append(train_data[i - lookback_period:i, 0])
    y_train.append(train_data[i, 0])

In [126]:
len(x_train)

4112

In [127]:
y_train

[0.06444658236064593,
 0.06598240469208227,
 0.06853244931786318,
 0.0656520580019242,
 0.05909148864650593,
 0.05868000417280039,
 0.0596594531313388,
 0.058992964195055064,
 0.06023900872810728,
 0.061316982138096554,
 0.07131431618235151,
 0.06880484044834412,
 0.06557092021837652,
 0.06599399580401744,
 0.06301508003662804,
 0.06429010234951837,
 0.06636491138594924,
 0.05510994169670691,
 0.04475907873842333,
 0.04570954991712339,
 0.0379261182525239,
 0.04514158543229052,
 0.042603131918445,
 0.04264949636618631,
 0.05953195090004981,
 0.06455090236806416,
 0.0628470089135651,
 0.06385543565194207,
 0.06312519560001384,
 0.04162947851587395,
 0.04586602992825095,
 0.049528821299827275,
 0.04739026114775169,
 0.04729753225226885,
 0.04666002109582368,
 0.048381301218225925,
 0.05841340859828681,
 0.056315417337985174,
 0.0657216046735365,
 0.06599979135998524,
 0.06671264474401029,
 0.0650203424014466,
 0.06876427155657039,
 0.07624633431085037,
 0.07981639678694363,
 0.0879069929

In [128]:
len(y_train)

4112

In [129]:
x_train, y_train = np.array(x_train), np.array(y_train)

In [130]:
y_train

array([0.06444658, 0.0659824 , 0.06853245, ..., 0.92342911, 0.92812931,
       0.92836693])

In [131]:
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

In [132]:
x_train

array([[[0.        ],
        [0.00159378],
        [0.00137355],
        ...,
        [0.06190233],
        [0.06178642],
        [0.06069686]],

       [[0.00159378],
        [0.00137355],
        [0.00412644],
        ...,
        [0.06178642],
        [0.06069686],
        [0.06444658]],

       [[0.00137355],
        [0.00412644],
        [0.00392939],
        ...,
        [0.06069686],
        [0.06444658],
        [0.0659824 ]],

       ...,

       [[0.90213045],
        [0.90330115],
        [0.90077429],
        ...,
        [0.92317411],
        [0.92438538],
        [0.92308138]],

       [[0.90330115],
        [0.90077429],
        [0.89858357],
        ...,
        [0.92438538],
        [0.92308138],
        [0.92342911]],

       [[0.90077429],
        [0.89858357],
        [0.89944131],
        ...,
        [0.92308138],
        [0.92342911],
        [0.92812931]]])

In [133]:
x_train.shape

(4112, 60, 1)

In [134]:
y_train

array([0.06444658, 0.0659824 , 0.06853245, ..., 0.92342911, 0.92812931,
       0.92836693])

In [135]:
test_data = scaled_data[training_data_len - lookback_period:, :]

x_test = []
y_test = []

for i in range(lookback_period, len(test_data)):
    x_test.append(test_data[i - lookback_period:i, 0])
    y_test.append(test_data[i, 0])

len(test_data)

1103

In [136]:
x_test

[array([0.89858357, 0.89944131, 0.90239125, 0.90348081, 0.93521148,
        0.93514773, 0.936359  , 0.9368748 , 0.93951758, 0.93971463,
        0.9403985 , 0.93992327, 0.93930314, 0.93823676, 0.93777891,
        0.94246172, 0.93751811, 0.93633582, 0.93485795, 0.93224995,
        0.93329895, 0.93751811, 0.9363532 , 0.94634474, 0.94807761,
        0.94534211, 0.94803705, 0.94599701, 0.94673305, 0.94393959,
        0.94697646, 0.94573621, 0.94350492, 0.94097806, 0.94132   ,
        0.9445771 , 0.9461419 , 0.93667196, 0.9379238 , 0.93247598,
        0.92548654, 0.92764828, 0.92874364, 0.92442015, 0.93660241,
        0.93449283, 0.91996917, 0.91981269, 0.91804504, 0.91659036,
        0.91723946, 0.91907086, 0.91752344, 0.92323207, 0.92317411,
        0.92438538, 0.92308138, 0.92342911, 0.92812931, 0.92836693]),
 array([0.89944131, 0.90239125, 0.90348081, 0.93521148, 0.93514773,
        0.936359  , 0.9368748 , 0.93951758, 0.93971463, 0.9403985 ,
        0.93992327, 0.93930314, 0.93823676, 0.

In [137]:
len(x_test)

1043

In [138]:
y_test

[0.9305228750594041,
 0.9304533283877923,
 0.9316935773648767,
 0.9309575417569806,
 0.9327773463308333,
 0.9306851506264995,
 0.9286682971497455,
 0.9399986090665677,
 0.9415228402860685,
 0.9437657204455621,
 0.9433484404158892,
 0.9457593916984457,
 0.9551713745899646,
 0.9584284770438025,
 0.9545106812096484,
 0.952708263303699,
 0.9528879255386971,
 0.9554031968286718,
 0.9556582012912498,
 0.9600280504908836,
 0.9569853836078497,
 0.9617783083931244,
 0.9647282463806752,
 0.9680085310583844,
 0.9695559445017559,
 0.9666929398537205,
 0.9646702908209985,
 0.9633604951723023,
 0.9722972424744707,
 0.9754384338089552,
 0.9765222027749123,
 0.9758267360587902,
 0.9753688871373429,
 0.9704426645648117,
 0.9691792333638565,
 0.9738214736939719,
 0.9728594114033358,
 0.9739026114775189,
 0.976840958353135,
 0.9764236783234612,
 0.9879104702514112,
 0.989272425903817,
 0.9969283553371275,
 0.9966153953148724,
 0.9959489063785887,
 0.9970152886766426,
 0.994778204073117,
 0.99288885282765

In [139]:
x_test, y_test = np.array(x_test), np.array(y_test)

In [140]:
x_test

array([[0.89858357, 0.89944131, 0.90239125, ..., 0.92342911, 0.92812931,
        0.92836693],
       [0.89944131, 0.90239125, 0.90348081, ..., 0.92812931, 0.92836693,
        0.93052288],
       [0.90239125, 0.90348081, 0.93521148, ..., 0.92836693, 0.93052288,
        0.93045333],
       ...,
       [0.98920867, 0.98757433, 0.98349426, ..., 0.98293788, 0.98621817,
        0.98533145],
       [0.98757433, 0.98349426, 0.98305379, ..., 0.98621817, 0.98533145,
        0.98909276],
       [0.98349426, 0.98305379, 0.98102535, ..., 0.98533145, 0.98909276,
        0.99481877]])

In [141]:
y_test

array([0.93052288, 0.93045333, 0.93169358, ..., 0.98909276, 0.99481877,
       0.99410012])

In [142]:
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [143]:
x_test

array([[[0.89858357],
        [0.89944131],
        [0.90239125],
        ...,
        [0.92342911],
        [0.92812931],
        [0.92836693]],

       [[0.89944131],
        [0.90239125],
        [0.90348081],
        ...,
        [0.92812931],
        [0.92836693],
        [0.93052288]],

       [[0.90239125],
        [0.90348081],
        [0.93521148],
        ...,
        [0.92836693],
        [0.93052288],
        [0.93045333]],

       ...,

       [[0.98920867],
        [0.98757433],
        [0.98349426],
        ...,
        [0.98293788],
        [0.98621817],
        [0.98533145]],

       [[0.98757433],
        [0.98349426],
        [0.98305379],
        ...,
        [0.98621817],
        [0.98533145],
        [0.98909276]],

       [[0.98349426],
        [0.98305379],
        [0.98102535],
        ...,
        [0.98533145],
        [0.98909276],
        [0.99481877]]])

In [144]:
y_test

array([0.93052288, 0.93045333, 0.93169358, ..., 0.98909276, 0.99481877,
       0.99410012])

In [145]:
# Defining the LSTM model
model = keras.Sequential()
model.add(layers.LSTM(100, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(layers.LSTM(100, return_sequences=False))
model.add(layers.Dense(25))
model.add(layers.Dense(1))
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 60, 100)           40800     
                                                                 
 lstm_3 (LSTM)               (None, 100)               80400     
                                                                 
 dense_2 (Dense)             (None, 25)                2525      
                                                                 
 dense_3 (Dense)             (None, 1)                 26        
                                                                 
Total params: 123,751
Trainable params: 123,751
Non-trainable params: 0
_________________________________________________________________
None


In [146]:
# Compiling the model

model.compile(optimizer='adam', loss='mean_squared_error')

In [147]:
# Training the model

model.fit(x_train, y_train, batch_size=1, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1e3b45cb7f0>

In [161]:
# Generating predictions for the test data

predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)



In [162]:
predictions

array([[18247.05 ],
       [18264.87 ],
       [18262.852],
       ...,
       [18729.705],
       [18762.52 ],
       [18812.982]], dtype=float32)

In [163]:
predictions = predictions.reshape(-1)
predictions

array([18247.05 , 18264.87 , 18262.852, ..., 18729.705, 18762.52 ,
       18812.982], dtype=float32)

In [164]:
predictions = list(predictions)
predictions

[18247.05,
 18264.87,
 18262.852,
 18272.568,
 18264.775,
 18280.03,
 18260.418,
 18241.373,
 18343.174,
 18357.143,
 18375.709,
 18370.352,
 18390.748,
 18474.723,
 18502.63,
 18464.469,
 18445.725,
 18446.172,
 18468.295,
 18469.975,
 18508.723,
 18480.607,
 18523.328,
 18549.879,
 18578.922,
 18591.896,
 18564.828,
 18545.727,
 18533.762,
 18614.922,
 18643.102,
 18651.55,
 18643.855,
 18638.836,
 18593.78,
 18582.436,
 18625.13,
 18616.785,
 18626.178,
 18652.902,
 18649.121,
 18753.123,
 18764.281,
 18831.744,
 18826.133,
 18817.498,
 18825.117,
 18803.057,
 18784.654,
 18776.457,
 18674.215,
 18670.975,
 18618.102,
 18657.182,
 18666.45,
 18675.326,
 18688.258,
 18575.836,
 18621.47,
 18654.518,
 18666.205,
 18636.283,
 18706.404,
 18682.844,
 18620.754,
 18590.523,
 18588.543,
 18600.53,
 18602.623,
 18639.307,
 18635.248,
 18649.475,
 18568.793,
 18578.71,
 18582.457,
 18555.154,
 18537.771,
 18535.576,
 18590.07,
 18564.16,
 18567.154,
 18572.232,
 18576.596,
 18603.46,
 18603

In [165]:
actual_test_data = stock_data.iloc[training_data_len:, :][["date", "close"]]
actual_test_data
save_df_to_csv(actual_test_data, "actual_test_data.csv")

In [166]:
predicted_test_data = actual_test_data.copy()
predicted_test_data["close"] = predictions
predicted_test_data
save_df_to_csv(predicted_test_data, "predicted_test_data.csv")

In [168]:
actual_test_data_list = list(actual_test_data["close"])

In [169]:
predicted_test_data_list = list(predicted_test_data["close"])

In [185]:
# Calculating the number of data points that were predicted correctly within a threshold percentage

threshold_percentage = 0.0015

correct_predictions = 0
for actual_data, predicted_data in zip(actual_test_data_list, predicted_test_data_list):
    if (abs(predicted_data-actual_data)/actual_data) <= threshold_percentage:
        correct_predictions += 1

print(round(correct_predictions/len(predictions) * 100, 2))

56.66
