LSTM CODE

In [34]:
import pandas as pd
import numpy as np
import yfinance as yf
import os
from tabulate import tabulate
from sklearn.metrics import r2_score 
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
# from itertools import cycle
import warnings
warnings.filterwarnings("ignore")

In [35]:
# Function to create dataset for time-series prediction
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

# Main function to get predicted values
def getpredictedvalues(selectedscript_1, start_date='2021-01-01', end_date='2025-01-01'):
    selectedscript_2 = selectedscript_1.dropna().reset_index(drop=True)
    selectedscript = selectedscript_2.copy()
    selectedscript['Date'] = pd.to_datetime(selectedscript['Date'], format='%Y-%m-%d')
    if start_date and end_date:
        selectedscript = selectedscript[(selectedscript['Date'] >= start_date) & (selectedscript['Date'] <= end_date)]
    selectedscript = selectedscript.set_index('Date')
    close_df = selectedscript[['Close']].reset_index()
    close_stock = close_df.copy()
    del close_df['Date']
    scaler = MinMaxScaler(feature_range=(0, 1))
    closedf = scaler.fit_transform(np.array(close_df).reshape(-1, 1))
    training_size = int(len(closedf) * 0.80)
    test_size = len(closedf) - training_size
    train_data, test_data = closedf[0:training_size, :], closedf[training_size:len(closedf), :1]
    time_step = 13
    X_train, y_train = create_dataset(train_data, time_step)
    X_test, y_test = create_dataset(test_data, time_step)
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
    model = Sequential([
        LSTM(32, return_sequences=True, input_shape=(time_step, 1)),
        LSTM(32, return_sequences=True),
        LSTM(32),
        Dense(1)
    ])
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, verbose=1)
    train_predict = model.predict(X_train)
    test_predict = model.predict(X_test)
    train_predict = scaler.inverse_transform(train_predict)
    test_predict = scaler.inverse_transform(test_predict)
    original_ytrain = scaler.inverse_transform(y_train.reshape(-1, 1))
    original_ytest = scaler.inverse_transform(y_test.reshape(-1, 1))
    train_r2_lstm = r2_score(original_ytrain, train_predict)
    test_r2_lstm = r2_score(original_ytest, test_predict)
    look_back = time_step
    x_input = test_data[len(test_data) - time_step:].reshape(1, -1)
    temp_input = list(x_input)[0].tolist()
    lst_output = []
    n_steps = time_step
    pred_days = 5
    i = 0
    while i < pred_days:
        if len(temp_input) > time_step:
            x_input = np.array(temp_input[1:]).reshape(1, -1)
            x_input = x_input.reshape((1, n_steps, 1))
            yhat = model.predict(x_input, verbose=0)
            temp_input.extend(yhat[0].tolist())
            temp_input = temp_input[1:]
            lst_output.extend(yhat.tolist())
            i += 1
        else:
            x_input = x_input.reshape((1, n_steps, 1))
            yhat = model.predict(x_input, verbose=0)
            temp_input.extend(yhat[0].tolist())
            lst_output.extend(yhat.tolist())
            i += 1
    lstmdf = scaler.inverse_transform(np.array(closedf.tolist() + lst_output).reshape(-1, 1)).flatten().tolist()
    finaldf = pd.DataFrame({'Close': lstmdf})
    data = {"Model": ["LSTM"], "Train R2 Score": [train_r2_lstm], "Test R2 Score": [test_r2_lstm]}
    df = pd.DataFrame(data)
    return df, finaldf, selectedscript

In [36]:
import pandas as pd
import os
import datetime as dt  # Import the datetime module
from tabulate import tabulate  # Import tabulate for better table formatting

# Paths and configurations
file_path = 'C://Users//manoj//Downloads//Major project data//Major pro source codes//DATASETS//filtered_indices_output.csv'
daily_data_path = 'C://Users//manoj//Downloads//Major project data//Major pro source codes//DATASETS//Daily_data'
output_csv_file = 'C://Users//manoj//Downloads//Major project data//Major pro source codes//DATASETS//lstm_prediction_output.csv'

try:
    # Check if the file exists
    if os.path.exists(file_path):
        print(f"File found: {file_path}")
        selected_indices = pd.read_csv(file_path, on_bad_lines='skip')  # Updated line
    else:
        print(f"File not found: {file_path}")
        raise FileNotFoundError(f"The specified file does not exist: {file_path}")

    all_output_data = []
    unique_index_codes = selected_indices['indexcode'].unique()
    current_date = dt.datetime.now().strftime("%Y-%m-%d")

    for index_code in unique_index_codes:
        filtered_indices = selected_indices[selected_indices['indexcode'] == index_code]
        for _, row in filtered_indices.iterrows():
            index_name = row['indexname']
            daily_file_name = f"{index_name.replace('.', '_')}.csv"
            daily_file_path = os.path.join(daily_data_path, daily_file_name)
            try:
                daily_data = pd.read_csv(daily_file_path)
                df, finaldf, selectedscript = getpredictedvalues(daily_data)
                predicted_values = finaldf['Close'].tail(5).values.tolist()
                output_data = {
                    'Run Date': current_date,
                    'Index Name': index_name,
                    'Model': df['Model'].iloc[0],
                    'Train R2 Score': df['Train R2 Score'].iloc[0],
                    'Test R2 Score': df['Test R2 Score'].iloc[0],
                    'Day 1': predicted_values[0],
                    'Day 2': predicted_values[1],
                    'Day 3': predicted_values[2],
                    'Day 4': predicted_values[3],
                    'Day 5': predicted_values[4]
                }
                all_output_data.append(output_data)
            except Exception as e:
                print(f"Error processing {index_name}: {str(e)}")

    # Define columns to maintain order
    columns = ['Run Date', 'Index Name', 'Model', 'Train R2 Score', 'Test R2 Score',
               'Day 1', 'Day 2', 'Day 3', 'Day 4', 'Day 5']
    output_df = pd.DataFrame(all_output_data, columns=columns)

    # Update existing CSV or create new
    if os.path.exists(output_csv_file):
        existing_df = pd.read_csv(output_csv_file)
        existing_df = existing_df[columns]
        combined_df = pd.concat([existing_df, output_df], ignore_index=True)
        combined_df = combined_df.drop_duplicates(subset=['Run Date', 'Index Name'], keep='last')
    else:
        combined_df = output_df

    combined_df.to_csv(output_csv_file, index=False)
    print(f"\nPredictions saved to {output_csv_file}")

    # Print the output DataFrame in a tabular format
    print("\nFinal Output:")
    print(tabulate(combined_df, headers='keys', tablefmt='fancy_grid', showindex=False))

except Exception as e:
    print(f"An error occurred: {str(e)}")

File found: C://Users//manoj//Downloads//Major project data//Major pro source codes//DATASETS//filtered_indices_output.csv
Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 43ms/step - loss: 0.1284 - val_loss: 0.0592
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 0.0083 - val_loss: 0.0256
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.0041 - val_loss: 0.0148
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.0030 - val_loss: 0.0030
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0022 - val_loss: 0.0061
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0027 - val_loss: 0.0027
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.0022 - val_loss: 0.0027
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━