In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

pd.options.display.max_rows = None
pd.options.display.max_columns = None

# Stacked LSTM Implementation

In [2]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

data = pd.read_csv("/kaggle/input/nsetataglobal/NSE-TATAGLOBAL11.csv")

# Data preprocessing
data['Date'] = pd.to_datetime(data['Date'])
data.sort_values('Date', inplace=True)
data.set_index('Date', inplace=True)

# Extract the 'Close' prices for prediction
dataset = data[['Close']].values

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
dataset_scaled = scaler.fit_transform(dataset)

# Get user input for the data split
train_percent = float(input("Enter the percentage of data to be used for training (e.g., 80 for 80%): "))
train_size = int(len(dataset_scaled) * train_percent / 100)
test_size = len(dataset_scaled) - train_size
train_data, test_data = dataset_scaled[0:train_size, :], dataset_scaled[train_size:len(dataset_scaled), :]

# Get user input for training parameters
epochs = int(input("Enter the number of epochs for training: "))
batch_size = int(input("Enter the batch size for training: "))

# Function to create a stacked LSTM model
def create_stacked_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(LSTM(units=50))
    model.add(Dense(units=1))
    return model

# Create the stacked LSTM model
input_shape = (train_data.shape[1], 1)
model = create_stacked_lstm_model(input_shape)
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(train_data, train_data, epochs=epochs, batch_size=batch_size, verbose=1)

# Make predictions
train_predictions = model.predict(train_data)
test_predictions = model.predict(test_data)

# Transform predictions back to original scale
train_predictions = scaler.inverse_transform(train_predictions)
test_predictions = scaler.inverse_transform(test_predictions)

# Create an interactive plot using Plotly
fig = go.Figure()

# Add training data and predictions to the plot
fig.add_trace(go.Scatter(x=data.index[:train_size], y=dataset[:train_size].flatten(),
                         mode='lines', name='Training Data'))

fig.add_trace(go.Scatter(x=data.index[train_size:], y=dataset[train_size:].flatten(),
                         mode='lines', name='Test Data'))

fig.add_trace(go.Scatter(x=data.index[:train_size], y=train_predictions.flatten(),
                         mode='lines', name='Training Predictions'))

fig.add_trace(go.Scatter(x=data.index[train_size:], y=test_predictions.flatten(),
                         mode='lines', name='Test Predictions'))

# Update layout and add options to toggle visibility of lines
fig.update_layout(
    title='Stock Market Prediction using Stacked LSTM',
    xaxis_title='Date',
    yaxis_title='Stock Price',
    updatemenus=[
        {
            'buttons': [
                {
                    'args': [{'visible': [True, True, True, True]}],
                    'label': 'All',
                    'method': 'update'
                },
                {
                    'args': [{'visible': [True, False, True, False]}],
                    'label': 'Train Data and Predictions',
                    'method': 'update'
                },
                {
                    'args': [{'visible': [False, True, False, True]}],
                    'label': 'Test Data and Predictions',
                    'method': 'update'
                },
                {
                    'args': [{'visible': [True, False, False, False]}],
                    'label': 'Train Data Only',
                    'method': 'update'
                },
                {
                    'args': [{'visible': [False, True, False, False]}],
                    'label': 'Test Data Only',
                    'method': 'update'
                }
            ],
            'direction': 'down',
            'showactive': True,
            'x': 0.1,
            'xanchor': 'left',
            'y': 1.1,
            'yanchor': 'top'
        }
    ]
)

fig.show()


Enter the percentage of data to be used for training (e.g., 80 for 80%):  80
Enter the number of epochs for training:  50
Enter the batch size for training:  32


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# Machine Learning Models Regression

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle


# Load the dataset
df = pd.read_csv('/kaggle/input/nsetataglobal/NSE-TATAGLOBAL11.csv')  


# Sort the data by date in ascending order
df['Date'] = pd.to_datetime(df['Date'])
df.sort_values(by='Date', inplace=True)
df.set_index('Date', inplace=True)


# User input for the target variable
print(df.columns)
target_variable = input("\nEnter the name of the target variable: ")

# Separate the features and target variable
X = df.drop(columns=[target_variable])
y = df[target_variable]

# Normalize the features to scale values between 0 and 1
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# List of regression models to try
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Support Vector Regression': SVR(),
    'Decision Tree Regression': DecisionTreeRegressor(),
    'Random Forest Regression': RandomForestRegressor(),
    'Gradient Boosting Regression': GradientBoostingRegressor(),
    'XGBoost Regression': xgb.XGBRegressor()
}

# Dictionary to store the model performance metrics
model_metrics = {}

# Train and evaluate each model
for model_name, model in models.items():
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    model_metrics[model_name] = {
        'Mean Squared Error (MSE)': mse,
        'Mean Absolute Error (MAE)': mae,
        'R-squared (R2)': r2
    }

# Display the performance metrics for each model
print("\n\tModel Performance Metrics:")
for model_name, metrics in model_metrics.items():
    print(f"\n{model_name} :")
    for metric_name, value in metrics.items():
        print(f"{metric_name}: {value:.4f}")
    print("___________________________________________________________________")

# Find the best model based on R-squared (R2)
best_model = max(model_metrics, key=lambda x: model_metrics[x]['R-squared (R2)'])

# Display the best model and its performance metrics
print("\n\n\tBest Model:")
print(best_model)
print("Performance Metrics:- ")
for metric_name, value in model_metrics[best_model].items():
    print(f"{metric_name}: {value:.4f}")

# Save the best model using pickle
filename = f"{best_model}.pkl"
with open(filename, 'wb') as file:
    pickle.dump(model, file)
print(f"\n{filename} pickle file is created... ")

Index(['Open', 'High', 'Low', 'Last', 'Close', 'Total Trade Quantity',
       'Turnover (Lacs)'],
      dtype='object')



Enter the name of the target variable:  Close



	Model Performance Metrics:

Linear Regression :
Mean Squared Error (MSE): 0.1708
Mean Absolute Error (MAE): 0.2992
R-squared (R2): 0.9999
___________________________________________________________________

Ridge Regression :
Mean Squared Error (MSE): 2.0568
Mean Absolute Error (MAE): 1.0202
R-squared (R2): 0.9991
___________________________________________________________________

Lasso Regression :
Mean Squared Error (MSE): 19.2869
Mean Absolute Error (MAE): 3.2414
R-squared (R2): 0.9919
___________________________________________________________________

Support Vector Regression :
Mean Squared Error (MSE): 85.1144
Mean Absolute Error (MAE): 2.8971
R-squared (R2): 0.9643
___________________________________________________________________

Decision Tree Regression :
Mean Squared Error (MSE): 0.7562
Mean Absolute Error (MAE): 0.5387
R-squared (R2): 0.9997
___________________________________________________________________

Random Forest Regression :
Mean Squared Error (MSE): 0.8495
