LSTM CODE

In [10]:
import pandas as pd
import numpy as np
import yfinance as yf
import os
# import math
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
# from itertools import cycle
import warnings
warnings.filterwarnings("ignore")


In [11]:
### Create new dataset according to requirement of time-series prediction
# convert an array of values into a dataset matrix
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100 
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

def getpredictedvalues(selectedscript_1):
    # selectedscript_1 = daily_data
    # Finding null values, if any
    selectedscript_1.isnull().sum()

    # Removing the row which have null value
    selectedscript_2=selectedscript_1.dropna().reset_index(drop=True)

    # Checking wether if there exist any null values
    selectedscript_2[selectedscript_2.isnull().any(axis=1)]

    # Making a copy of dataset as selectedscript
    selectedscript=selectedscript_2.copy()

    # converting the date column in to datetime 
    selectedscript['Date']=pd.to_datetime(selectedscript['Date'],format='%Y-%m-%d')

    # Setting the date column as index
    selectedscript=selectedscript.set_index('Date')

    ## Model Building - Creating dataframe which only includes date and close time

    close_df=pd.DataFrame(selectedscript['Close'])
    close_df=close_df.reset_index()

    ### Normalizing / scaling close value between 0 to 1
    close_stock = close_df.copy()
    del close_df['Date']
    scaler=MinMaxScaler(feature_range=(0,1))
    closedf=scaler.fit_transform(np.array(close_df).reshape(-1,1))
    #print(closedf.shape)

    ### Split data for training and testing
    #- Ratio for training and testing data is 80:20
    training_size=int(len(closedf)*0.80)
    test_size=len(closedf)-training_size
    train_data,test_data=closedf[0:training_size,:],closedf[training_size:len(closedf),:1]
    #print("train_data: ", train_data.shape)
    #print("test_data: ", test_data.shape)

    # reshape into X=t,t+1,t+2,t+3 and Y=t+4
    time_step = 13
    X_train, y_train = create_dataset(train_data, time_step)
    X_test, y_test = create_dataset(test_data, time_step)

    #print("X_train: ", X_train.shape)
    #print("y_train: ", y_train.shape)
    #print("X_test: ", X_test.shape)
    #print("y_test", y_test.shape)

    ## Algorithms - LSTM - reshape input to be [samples, time steps, features] which is required for LSTM
    X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
    X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)
    #print("X_train: ", X_train.shape)
    #print("X_test: ", X_test.shape)

    ### LSTM model structure
    tf.keras.backend.clear_session()
    model=Sequential()
    model.add(LSTM(32,return_sequences=True,input_shape=(time_step,1)))
    model.add(LSTM(32,return_sequences=True))
    model.add(LSTM(32))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error',optimizer='adam')
    model.summary()

    ### Model Training
    model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=32,verbose=1)

    ### Lets Do the prediction and check performance metrics
    train_predict=model.predict(X_train)
    test_predict=model.predict(X_test)
    train_predict.shape, test_predict.shape

    # Transform back to original form
    train_predict = scaler.inverse_transform(train_predict)
    test_predict = scaler.inverse_transform(test_predict)
    original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
    original_ytest = scaler.inverse_transform(y_test.reshape(-1,1))

    ### R2 score for regression
    #R-squared (R2) is a statistical measure that represents the proportion of the variance for a dependent variable that's explained by an independent variable or variables in a regression model.
    #1 = Best - 0 or < 0 = worse
    train_r2_lstm=r2_score(original_ytrain, train_predict)
    test_r2_lstm=r2_score(original_ytest, test_predict)
    #print("Train data R2 score:", train_r2_lstm)
    #print("Test data R2 score:", test_r2_lstm)

    ### Comparision between original stock close price vs predicted close price
    # shift train predictions for plotting
    look_back=time_step
    trainPredictPlot = np.empty_like(closedf)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
    #print("Train predicted data: ", trainPredictPlot.shape)

    ### Predicting next 5 days
    x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
    temp_input=list(x_input)
    temp_input=temp_input[0].tolist()

    lst_output=[]
    n_steps=time_step
    i=0
    pred_days = 5
    while(i<pred_days):
        
        if(len(temp_input)>time_step):
            
            x_input=np.array(temp_input[1:])
            ##print("{} day input {}".format(i,x_input))
            x_input = x_input.reshape(1,-1)
            x_input = x_input.reshape((1, n_steps, 1))
            
            yhat = model.predict(x_input, verbose=0)
            ##print("{} day output {}".format(i,yhat))
            temp_input.extend(yhat[0].tolist())
            temp_input=temp_input[1:]
            ##print(temp_input)
        
            lst_output.extend(yhat.tolist())
            i=i+1
            
        else:
            
            x_input = x_input.reshape((1, n_steps,1))
            yhat = model.predict(x_input, verbose=0)
            temp_input.extend(yhat[0].tolist())
            
            lst_output.extend(yhat.tolist())
            i=i+1

    # #print("Output of predicted next days: ", len(lst_output))

    lstmdf=closedf.tolist()
    lstmdf.extend((np.array(lst_output).reshape(-1,1)).tolist())
    lstmdf=scaler.inverse_transform(lstmdf).reshape(1,-1).tolist()[0]
    finaldf = pd.DataFrame({'lstm':lstmdf,})

    data={"Model": ["LSTM"],"Train R2 Score": [train_r2_lstm],"Test R2 Score": [test_r2_lstm]}
    df=pd.DataFrame(data)
    # #print(df)
    # #print(finaldf.to_string())
    # #print(selectedscript.to_string()) 

    return df, finaldf, selectedscript


Data for Index Code: ^CNXMETAL, Index Name: JSWSTEEL.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  906.799988  906.799988  914.400024  900.099976  910.000000   
1250  2025-01-17  908.599976  908.599976  917.500000  905.250000  906.799988   
1251  2025-01-20  919.200012  919.200012  925.650024  900.500000  908.599976   
1252  2025-01-21  926.849976  926.849976  933.000000  919.549988  925.000000   
1253  2025-01-22  919.400024  919.400024  932.000000  907.150024  932.000000   

       Volume  
1249  1946590  
1250   911260  
1251  1299378  
1252  1474919  
1253  1080075  

Data for Index Code: ^CNXMETAL, Index Name: HINDALCO.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  602.599976  602.599976  607.000000  590.549988  596.000000   
1250  2025-01-17  617.000000  617.000000  619.000000  603.700012  603.849976   
1251  2025-01-20  618


Data for Index Code: ^CNXMETAL, Index Name: JSWSTEEL.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  906.799988  906.799988  914.400024  900.099976  910.000000   
1250  2025-01-17  908.599976  908.599976  917.500000  905.250000  906.799988   
1251  2025-01-20  919.200012  919.200012  925.650024  900.500000  908.599976   
1252  2025-01-21  926.849976  926.849976  933.000000  919.549988  925.000000   
1253  2025-01-22  919.400024  919.400024  932.000000  907.150024  932.000000   

       Volume  
1249  1946590  
1250   911260  
1251  1299378  
1252  1474919  
1253  1080075  

Data for Index Code: ^CNXMETAL, Index Name: HINDALCO.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  602.599976  602.599976  607.000000  590.549988  596.000000   
1250  2025-01-17  617.000000  617.000000  619.000000  603.700012  603.849976   
1251  2025-01-20  618


Data for Index Code: ^CNXMETAL, Index Name: JSWSTEEL.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  906.799988  906.799988  914.400024  900.099976  910.000000   
1250  2025-01-17  908.599976  908.599976  917.500000  905.250000  906.799988   
1251  2025-01-20  919.200012  919.200012  925.650024  900.500000  908.599976   
1252  2025-01-21  926.849976  926.849976  933.000000  919.549988  925.000000   
1253  2025-01-22  919.400024  919.400024  932.000000  907.150024  932.000000   

       Volume  
1249  1946590  
1250   911260  
1251  1299378  
1252  1474919  
1253  1080075  

Data for Index Code: ^CNXMETAL, Index Name: HINDALCO.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  602.599976  602.599976  607.000000  590.549988  596.000000   
1250  2025-01-17  617.000000  617.000000  619.000000  603.700012  603.849976   
1251  2025-01-20  618


Data for Index Code: ^CNXMETAL, Index Name: JSWSTEEL.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  906.799988  906.799988  914.400024  900.099976  910.000000   
1250  2025-01-17  908.599976  908.599976  917.500000  905.250000  906.799988   
1251  2025-01-20  919.200012  919.200012  925.650024  900.500000  908.599976   
1252  2025-01-21  926.849976  926.849976  933.000000  919.549988  925.000000   
1253  2025-01-22  919.400024  919.400024  932.000000  907.150024  932.000000   

       Volume  
1249  1946590  
1250   911260  
1251  1299378  
1252  1474919  
1253  1080075  

Data for Index Code: ^CNXMETAL, Index Name: HINDALCO.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  602.599976  602.599976  607.000000  590.549988  596.000000   
1250  2025-01-17  617.000000  617.000000  619.000000  603.700012  603.849976   
1251  2025-01-20  618


Data for Index Code: ^CNXMETAL, Index Name: JSWSTEEL.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  906.799988  906.799988  914.400024  900.099976  910.000000   
1250  2025-01-17  908.599976  908.599976  917.500000  905.250000  906.799988   
1251  2025-01-20  919.200012  919.200012  925.650024  900.500000  908.599976   
1252  2025-01-21  926.849976  926.849976  933.000000  919.549988  925.000000   
1253  2025-01-22  919.400024  919.400024  932.000000  907.150024  932.000000   

       Volume  
1249  1946590  
1250   911260  
1251  1299378  
1252  1474919  
1253  1080075  

Data for Index Code: ^CNXMETAL, Index Name: HINDALCO.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  602.599976  602.599976  607.000000  590.549988  596.000000   
1250  2025-01-17  617.000000  617.000000  619.000000  603.700012  603.849976   
1251  2025-01-20  618


Data for Index Code: ^CNXMETAL, Index Name: JSWSTEEL.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  906.799988  906.799988  914.400024  900.099976  910.000000   
1250  2025-01-17  908.599976  908.599976  917.500000  905.250000  906.799988   
1251  2025-01-20  919.200012  919.200012  925.650024  900.500000  908.599976   
1252  2025-01-21  926.849976  926.849976  933.000000  919.549988  925.000000   
1253  2025-01-22  919.400024  919.400024  932.000000  907.150024  932.000000   

       Volume  
1249  1946590  
1250   911260  
1251  1299378  
1252  1474919  
1253  1080075  

Data for Index Code: ^CNXMETAL, Index Name: HINDALCO.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  602.599976  602.599976  607.000000  590.549988  596.000000   
1250  2025-01-17  617.000000  617.000000  619.000000  603.700012  603.849976   
1251  2025-01-20  618


Data for Index Code: ^CNXMETAL, Index Name: JSWSTEEL.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  906.799988  906.799988  914.400024  900.099976  910.000000   
1250  2025-01-17  908.599976  908.599976  917.500000  905.250000  906.799988   
1251  2025-01-20  919.200012  919.200012  925.650024  900.500000  908.599976   
1252  2025-01-21  926.849976  926.849976  933.000000  919.549988  925.000000   
1253  2025-01-22  919.400024  919.400024  932.000000  907.150024  932.000000   

       Volume  
1249  1946590  
1250   911260  
1251  1299378  
1252  1474919  
1253  1080075  

Data for Index Code: ^CNXMETAL, Index Name: HINDALCO.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  602.599976  602.599976  607.000000  590.549988  596.000000   
1250  2025-01-17  617.000000  617.000000  619.000000  603.700012  603.849976   
1251  2025-01-20  618


Data for Index Code: ^CNXMETAL, Index Name: JSWSTEEL.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  906.799988  906.799988  914.400024  900.099976  910.000000   
1250  2025-01-17  908.599976  908.599976  917.500000  905.250000  906.799988   
1251  2025-01-20  919.200012  919.200012  925.650024  900.500000  908.599976   
1252  2025-01-21  926.849976  926.849976  933.000000  919.549988  925.000000   
1253  2025-01-22  919.400024  919.400024  932.000000  907.150024  932.000000   

       Volume  
1249  1946590  
1250   911260  
1251  1299378  
1252  1474919  
1253  1080075  

Data for Index Code: ^CNXMETAL, Index Name: HINDALCO.NS
   accuracy
0      0.95
            Date   Adj Close       Close        High         Low        Open  \
1249  2025-01-16  602.599976  602.599976  607.000000  590.549988  596.000000   
1250  2025-01-17  617.000000  617.000000  619.000000  603.700012  603.849976   
1251  2025-01-20  618

In [12]:
import pandas as pd
import os
from tabulate import tabulate

# Load data from the CSV file
file_path = 'C://Users//manoj//Downloads//Major project data//Major pro source codes//DATASETS//filtered_indices_output.csv'
daily_data_path = 'C://Users//manoj//Downloads//Major project data//Major pro source codes//DATASETS//Daily_data'

try:
    selected_indices = pd.read_csv(file_path)
    #print(f"Data loaded successfully from {file_path}")
except FileNotFoundError:
    #print(f"Error: File not found at {file_path}")
    exit()
except Exception as e:
    #print(f"An error occurred: {e}")
    exit()

# Iterate through each unique index code
unique_index_codes = selected_indices['indexcode'].unique()
for index_code in unique_index_codes:
    # Filter the selected indices for the current index code
    filtered_indices = selected_indices[selected_indices['indexcode'] == index_code]
    
    # Iterate through each row of the filtered indices
    for id, row in filtered_indices.iterrows():
        if(id>=0):
            index_name = row['indexname']
            
            # Construct the file path for the daily data
            daily_file_name = f"{index_name.replace('.', '_')}.csv"
            daily_file_path = os.path.join(daily_data_path, daily_file_name)
            
            try:
                daily_data = pd.read_csv(daily_file_path)
                                
                # #print the DataFrame in tabular format using tabulate
                #print(tabulate(daily_data.head(), headers='keys', tablefmt='fancy_grid', showindex=False))

                '''
                df = % of accuracy for training and testing data
                finaldf = predicted values
                selectedscript = original data                
                ''' 
                df, finaldf, selectedscript = getpredictedvalues(daily_data)
                predectedvalues=finaldf.tail(5)
                
                print(f"\nData for Index Code: {index_code}, Index Name: {index_name}")
                print(df)
                print(predectedvalues)
                
                # df.to_csv('test1.csv')
                # finaldf.to_csv('test2.csv')
                # selectedscript.to_csv('test3.csv')

                # #print(df)
                # #print(finaldf)
                # #print(selectedscript)
                
            except FileNotFoundError:
                print(f"Error: File not found at {daily_file_path} for {index_name}")
            except Exception as e:
                print(f"An error occurred while loading data for {index_name}: {e}")

# #print(tabulate(daily_data.head(), headers='keys', tablefmt='fancy_grid', showindex=False))

Epoch 1/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 53ms/step - loss: 0.1258 - val_loss: 0.0324
Epoch 2/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.0062 - val_loss: 0.0018
Epoch 3/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.0014 - val_loss: 0.0011
Epoch 4/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0011 - val_loss: 0.0014
Epoch 5/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 6/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 0.0012 - val_loss: 0.0015
Epoch 7/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0011 - val_loss: 0.0023
Epoch 8/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0012 - val_loss: 0.0016
Epoch 9/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━

Epoch 1/10


KeyboardInterrupt: 