In [5]:
# Import required libraries
import numpy as np
import pandas as pd
import pickle
import datetime
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from scikeras.wrappers import KerasRegressor

In [6]:
np.random.seed(1001)

In [7]:
df = pd.read_csv('Nifty_50_final_data.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,%Change
0,03-11-1995,994.2,1000.91,992.69,1000.0,0.58%
1,06-11-1995,1001.53,1001.53,988.92,988.92,-1.28%
2,07-11-1995,987.17,987.17,977.05,978.22,-0.91%
3,08-11-1995,976.28,976.28,962.98,964.01,-1.27%
4,09-11-1995,960.32,960.32,952.13,953.07,-0.76%


In [8]:
new_df = df.drop(['%Change'], axis=1)
new_df['%Change'] = ((df["Close"] - df["Open"]) / df["Open"]) * 100
new_df.head()

Unnamed: 0,Date,Open,High,Low,Close,%Change
0,03-11-1995,994.2,1000.91,992.69,1000.0,0.583384
1,06-11-1995,1001.53,1001.53,988.92,988.92,-1.259074
2,07-11-1995,987.17,987.17,977.05,978.22,-0.906632
3,08-11-1995,976.28,976.28,962.98,964.01,-1.256812
4,09-11-1995,960.32,960.32,952.13,953.07,-0.754957


In [9]:
new_df['Date'] = pd.to_datetime(new_df['Date'], format='mixed')
new_df.head()

Unnamed: 0,Date,Open,High,Low,Close,%Change
0,1995-03-11,994.2,1000.91,992.69,1000.0,0.583384
1,1995-06-11,1001.53,1001.53,988.92,988.92,-1.259074
2,1995-07-11,987.17,987.17,977.05,978.22,-0.906632
3,1995-08-11,976.28,976.28,962.98,964.01,-1.256812
4,1995-09-11,960.32,960.32,952.13,953.07,-0.754957


In [10]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7236 entries, 0 to 7235
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     7236 non-null   datetime64[ns]
 1   Open     7235 non-null   float64       
 2   High     7235 non-null   float64       
 3   Low      7235 non-null   float64       
 4   Close    7236 non-null   float64       
 5   %Change  7235 non-null   float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 339.3 KB


In [11]:
print(new_df.isnull().sum())
print(new_df.shape)
new_df.dropna(inplace=True)
print(new_df.isnull().sum())
print(new_df.shape)

Date       0
Open       1
High       1
Low        1
Close      0
%Change    1
dtype: int64
(7236, 6)
Date       0
Open       0
High       0
Low        0
Close      0
%Change    0
dtype: int64
(7235, 6)


In [12]:
# Select features for the model
cols = ['Open', 'High', 'Low', 'Close', '%Change']
data = new_df[cols]

In [13]:
data['%Change'].describe()

count    7235.000000
mean       -0.022360
std         1.353578
min       -12.243286
25%        -0.637740
50%        -0.011388
75%         0.619093
max        17.695983
Name: %Change, dtype: float64

In [14]:
# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)

In [15]:
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [16]:

# Prepare data for LSTM
X, y = [], []
lookback = 60  # Use 60 days of historical data to predict next day
for i in range(lookback, len(data_scaled)):
    X.append(data_scaled[i-lookback:i, :])
    y.append(data_scaled[i, :2])  # Predict Open and Close prices

X, y = np.array(X), np.array(y)


In [17]:
# Split into training and validation sets
train_size = int(0.8 * len(X))
X_train, X_val = X[:train_size], X[train_size:]
y_train, y_val = y[:train_size], y[train_size:]


In [18]:
# Define the RNN model
def create_rnn_model(input_shape, units=50, **kwargs):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(LSTM(units=units, return_sequences=False))
    model.add(Dense(y_train.shape[1]))  # Match the number of target columns
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [19]:
# Initialize KerasRegressor
rnn_model = KerasRegressor(
    model=create_rnn_model,
    input_shape=(X_train.shape[1], X_train.shape[2]),
    units=50,
    epochs=20,
    batch_size=32
)

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)


early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


In [21]:

history = rnn_model.fit(X_train, y_train,validation_data=(X_val,y_val), epochs=100, batch_size=32, validation_split=0.2, callbacks=[tensorflow_callback, early_stopping_callback])
# # Train the model
# rnn_model.fit(X_train, y_train)

Epoch 1/100
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 31ms/step - loss: 0.0035 - val_loss: 0.0039
Epoch 2/100
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - loss: 2.4873e-05 - val_loss: 0.0040
Epoch 3/100
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - loss: 2.3264e-05 - val_loss: 0.0030
Epoch 4/100
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - loss: 1.9606e-05 - val_loss: 0.0026
Epoch 5/100
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - loss: 1.5648e-05 - val_loss: 0.0018
Epoch 6/100
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 31ms/step - loss: 1.2948e-05 - val_loss: 0.0013
Epoch 7/100
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - loss: 1.0476e-05 - val_loss: 9.1428e-04
Epoch 8/100
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - loss: 8.4149e-06 - val_loss:

In [24]:
from tensorflow.keras.models import save_model

# Access the Keras model
keras_model = rnn_model.model_

# Save the model in .h5 or .keras format
save_model(keras_model, 'single_pred_rnn_model.keras')


In [25]:
%load_ext tensorboard

In [26]:
%tensorboard --logdir logs/fit

In [27]:
# Predicting multiple future days
def create_multi_step_dataset(data, lookback, steps_ahead):
    X, y = [], []
    for i in range(lookback, len(data) - steps_ahead + 1):
        X.append(data[i - lookback:i, :])
        y.append(data[i:i + steps_ahead, :2])  # Predict Open and Close for multiple days
    return np.array(X), np.array(y)

steps_ahead = 5  # Predict next 5 days
X, y = create_multi_step_dataset(data_scaled, lookback, steps_ahead)

In [28]:
# Split into training and validation sets
X_train, X_val = X[:train_size], X[train_size:]
y_train, y_val = y[:train_size], y[train_size:]


In [29]:
# Define multi-step RNN model
def create_multi_step_rnn_model(input_shape, units=50, steps_ahead=5, **kwargs):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(LSTM(units=units, return_sequences=False))
    model.add(Dense(steps_ahead * 2))  # Predict Open and Close for multiple days
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [30]:
# Initialize and train multi-step RNN model
multi_step_rnn_model = KerasRegressor(
    model=create_multi_step_rnn_model,
    input_shape=(X_train.shape[1], X_train.shape[2]),
    units=50,
    steps_ahead=steps_ahead,
    epochs=20,
    batch_size=32
)

log_dir="logs/multi_step/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)


early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)



In [35]:
history = multi_step_rnn_model.fit(
    X_train, 
    y_train, 
    validation_data=(X_val, y_val), 
    epochs=100, 
    batch_size=32, 
    validation_split=0.2, 
    callbacks=[tensorflow_callback, early_stopping_callback]
)



# multi_step_rnn_model.fit(X_train, y_train.reshape(y_train.shape[0], -1))

ValueError: Found array with dim 3. None expected <= 2.

In [None]:
from tensorflow.keras.models import save_model

# Access the Keras model
keras_model_multi = multi_step_rnn_model.model_

# Save the model in .h5 or .keras format
save_model(keras_model_multi, 'multi_step_rnn_model.keras')


In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/multi_step