In [1]:
#importing the required libraries.
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
import tensorflow as tf
import keras
from keras import layers
from tensorflow.keras.layers import LSTM
from keras.layers import Activation, Dense
from tensorflow.keras.models import *
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import datetime as dt
from sklearn.preprocessing import StandardScaler

In [2]:
stock_data= pd.read_csv('Nifty50.csv' ,index_col='Date ') # Load the data from 'Nifty50.csv' into a pandas DataFrame.
stock_data.head(3)  # Display the first 3 rows of the DataFrame
stock_data.dropna(inplace=True) # Remove any rows in the DataFrame that contain missing values (NaN).

In [None]:
stock_data.index = pd.to_datetime(stock_data.index) # Convert the index of the DataFrame to datetime format
# Create a new figure with a specific size
plt.figure(figsize=(15, 10))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) # Set the date format of x-axis to 'YYYY-MM-DD'.
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=60)) # Set the x-axis major ticks to occur every 60 days.
x_dates = stock_data.index.date # Get the dates from the index of the DataFrame.
plt.plot(x_dates, stock_data['High'], label='High') # Plot the 'High' column values against the dates.
plt.plot(x_dates, stock_data['Low'], label='Low')   # Plot the 'Low' column values against the dates.
plt.xlabel('Time Scale') # Label the x-axis as 'Time Scale'.
plt.ylabel('scaled USD') # Label the y-axis as 'scaled USD'.
plt.legend()             # Add a legend to the plot.
plt.gcf().autofmt_xdate()# Auto-format the x-axis labels to fit into the figure area nicely.
plt.show()               # Display the plot.

In [None]:
stock_data_last_2_months = stock_data[-60:] # Select the last 60 rows from the DataFrame
# Create a new figure with a specific size
plt.figure(figsize=(15, 10))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) # Set the date format of x-axis to 'YYYY-MM-DD'.
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=10))     # Set the x-axis major ticks to occur every 10 days.
x_dates = stock_data_last_2_months.index.date  # Get the dates from the index of the DataFrame.
plt.plot(x_dates, stock_data_last_2_months['High'], label='High') # Plot the 'High' column values against the dates.
plt.plot(x_dates, stock_data_last_2_months['Low'], label='Low')   # Plot the 'Low' column values against the dates.
plt.xlabel('Time Scale')  # Label the x-axis as 'Time Scale'.
plt.ylabel('scaled USD')  # Label the y-axis as 'scaled USD'.
plt.legend()              # Add a legend to the plot.
plt.gcf().autofmt_xdate() # Auto-format the x-axis labels to fit into the figure area nicely.
plt.show()  # Display the plot.

In [5]:
target_y=stock_data['Close'] # 'target_y' is set as the 'Close' column of the DataFrame
X_feat=stock_data.iloc[:,0:3] # 'X_feat' is set as the first 3 columns of the DataFrame

In [6]:
sc = StandardScaler() # Instantiate the StandardScaler. This will standardize features by removing the mean and scaling to unit variance.
X_ft=sc.fit_transform(X_feat.values) # Fit the StandardScaler to the feature data (X_feat.values) and transform it.
X_ft=pd.DataFrame(columns=X_feat.columns,data=X_ft,index=X_feat.index) # Convert the transformed data back into a DataFrame.

In [7]:
def lstm_split(data,n_steps):  # Define a function 'lstm_split' that takes in two arguments:
  X,y=[],[]   # Initialize two empty lists, 'X' and 'y'.
  for i in range(len(data)-n_steps+1):  # Loop over the data. The range is from 0 to the length of the data minus 'n_steps' plus 1.
    X.append(data[i:i + n_steps,:-1])   # For each iteration, append a slice of the data (from 'i' to 'i + n_steps') excluding the last column to 'X'.
    y. append(data[i + n_steps-1,-1])   # Append the last column of the data at the index 'i + n_steps - 1' to 'y'.
  return np.array(X), np.array(y)       # Convert 'X' and 'y' to numpy arrays and return them.

In [8]:
X1,y1=lstm_split(stock_data.values, n_steps=2)
# Use the 'lstm_split' function to split the stock data into sequences of length 2. 'X1' contains the sequences and 'y1' contains the corresponding labels.
train_split=0.8  # Set the proportion of data to be used for training.
split_idx=int(np.ceil(len(X1)*train_split))  # Calculate the index at which to split the data into training and testing sets.
date_index=stock_data.index   # Get the dates from the index of the DataFrame.
X_train,X_test=X1[:split_idx],X1[split_idx:]  # Split 'X1' into training and testing sets based on 'split_idx'.
y_train,y_test=y1[:split_idx],y1[split_idx:]  # Split 'y1' into training and testing sets based on 'split_idx'.
X_train_date,X_test_date=date_index[split_idx:],date_index[split_idx:]  # Split 'date_index' into training and testing sets based on 'split_idx'.
print(X1.shape,X_train.shape,X_test.shape,y_test.shape) # Print the shapes of 'X1', 'X_train', 'X_test', and 'y_test' to verify the splits.

(2052, 2, 5) (1642, 2, 5) (410, 2, 5) (410,)


In [None]:
lstm = Sequential()   # Initialize a Sequential model.
lstm.add(LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2]),
              activation='linear',return_sequences=True)) # Add an LSTM layer to the model. This layer has 32 units, uses a linear activation function, and returns sequences.
# The input shape is set to match the shape of the training data.
lstm.add(Dense(40,activation='linear')) # Add a Dense layer with 40 units and a linear activation function.
lstm.add(Dense(80,activation='linear')) # Add a Dense layer with 80 units and a linear activation function.
lstm.add(Dense(50,activation='linear')) # Add a Dense layer with 50 units and a linear activation function.
lstm.add(Dense(20,activation='linear')) # Add a Dense layer with 20 units and a linear activation function.
lstm.add(Dense(80,activation='linear')) # Add a Dense layer with 80 units and a linear activation function.
lstm.add(Dense(10,activation='linear')) # Add a Dense layer with 10 units and a linear activation function.
lstm.add(Dense(1))      # Add a final Dense layer with 1 unit. This will output the prediction of the model.
lstm.compile(loss='mean_absolute_error',optimizer='adam') # Compile the model with the 'adam' optimizer and mean absolute error as the loss function.
lstm.summary()    # Print a summary of the model. This will show the structure of the model and the number of parameters.

In [10]:
history=lstm.fit(X_train,y_train,epochs=300,batch_size=10,verbose=2,shuffle=False)
# Fit the model to the training data.
# 'epochs' is the number of times the learning algorithm will work through the entire training dataset.
# 'batch_size' is the number of samples to work through before updating the internal model parameters.
# 'verbose' is for turning on detailed logging during training (0 = silent, 1 = progress bar, 2 = one line per epoch).
# 'shuffle' determines whether to shuffle the training data before each epoch. Here it is set to False.

165/165 - 1s - loss: 4755.5249 - 593ms/epoch - 4ms/step
Epoch 282/300
165/165 - 1s - loss: 4704.5200 - 593ms/epoch - 4ms/step
Epoch 283/300
165/165 - 1s - loss: 4734.5093 - 611ms/epoch - 4ms/step
Epoch 284/300
165/165 - 1s - loss: 4824.4214 - 592ms/epoch - 4ms/step
Epoch 285/300
165/165 - 1s - loss: 4815.5698 - 585ms/epoch - 4ms/step
Epoch 286/300
165/165 - 1s - loss: 4831.0366 - 629ms/epoch - 4ms/step
Epoch 287/300
165/165 - 1s - loss: 4766.6523 - 606ms/epoch - 4ms/step
Epoch 288/300
165/165 - 1s - loss: 4815.0117 - 633ms/epoch - 4ms/step
Epoch 289/300
165/165 - 1s - loss: 4843.5537 - 617ms/epoch - 4ms/step
Epoch 290/300
165/165 - 1s - loss: 4767.4395 - 616ms/epoch - 4ms/step
Epoch 291/300
165/165 - 1s - loss: 4857.9321 - 606ms/epoch - 4ms/step
Epoch 292/300
165/165 - 1s - loss: 4814.3882 - 641ms/epoch - 4ms/step
Epoch 293/300
165/165 - 1s - loss: 4736.7139 - 651ms/epoch - 4ms/step
Epoch 294/300
165/165 - 1s - loss: 4816.4043 - 647ms/epoch - 4ms/step
Epoch 295/300
165/165 - 1s - loss:

In [None]:
plt.plot(history.history['loss']) # Plot the loss values stored in the 'history' object. 'history.history['loss']' contains the loss values at the end of each epoch.
plt.title('Model Loss')   # Set the title of the plot as 'Model Loss'.
plt.ylabel('Loss')        # Label the y-axis as 'Loss'.
plt.xlabel('Epoch')       # Label the x-axis as 'Epoch'.
plt.show()

In [None]:
y_pred=lstm.predict(X_test)
# Use the trained LSTM model to predict the target variable for the test data.
# The 'predict' function returns the predicted values as a numpy array.

In [None]:
y_pred_r = y_pred[:, 0, 0]  # 'y_pred_r' is set as the first column of the first dimension of 'y_pred'. This reshapes the prediction array for plotting.
plt.figure(figsize=(15, 10))  # Create a new figure with a specific size (15 units wide by 10 units tall).
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) # Set the date format for the x-axis to 'YYYY-MM-DD'.
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=60))     # Set the x-axis major ticks to occur every 60 days.
x_dates = X_test_date   # Get the dates from the index of the DataFrame.
plt.plot(x_dates[:410], y_pred_r[:410], label='Predicted')  # Plot the predicted values against the dates.
plt.plot(x_dates[:410], y_test[:410], label='Real')         # Plot the real values against the dates.
plt.xlabel('Time Scale')  # Label the x-axis as 'Time Scale'.
plt.ylabel('USD')         # Label the y-axis as 'USD'.
plt.legend()              # Add a legend to the plot.
plt.gcf().autofmt_xdate() # Auto-format the x-axis labels to fit into the figure area nicely.
plt.show()

In [None]:
mae = mean_absolute_error(y_test, y_pred_r)   # Calculate the Mean Absolute Error (MAE)
mape = mean_absolute_percentage_error(y_test, y_pred_r)   # Calculate the Mean Absolute Percentage Error (MAPE) between the actual and predicted values.
mse = mean_squared_error(y_test, y_pred_r)    # Calculate the Mean Squared Error (MSE) between the actual and predicted values.
rmse = np.sqrt(mse)   # Calculate the Root Mean Squared Error (RMSE)
# Print the calculated error metrics.
print("Mean Absolute Error (MAE):", mae)
print("Mean Absolute Percentage Error (MAPE):", mape)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)

In [None]:
# Calculate the accuracy of the model. This is done by subtracting the Mean Absolute Percentage Error (MAPE) from 1 and multiplying by 100.
# This gives the percentage of predictions that fall within the acceptable error range.
accurate = (1 - mape) * 100
print("Model accurate:", accurate)