# NIFTY 50 Minute-Level Price Prediction

This notebook uses minute-level data of the NIFTY 50 index to perform forecasting using:
- Prophet
- ARIMA
- LSTM

## Objectives
- Load and inspect the dataset
- Visualize patterns and trends
- Apply and compare forecasting models


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## 1. Import Required Libraries


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels
import scipy

## 2. Load and Explore the Dataset


In [None]:
filepath='/kaggle/input/nifty-50-minute-data/NIFTY 50_minute_data.csv'
df=pd.read_csv(filepath)

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df.info()

## 3. Preprocessing for Time-Series Forecasting


In [None]:
df['date']=pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

In [None]:
daily_df = df['close'].resample('D').last().dropna()

In [None]:
from sklearn.ensemble import IsolationForest
df_numeric = daily_df.to_frame(name='close')
model = IsolationForest(contamination=0.01, random_state=42)
outlier_labels = model.fit_predict(df_numeric)
df_numeric['Outlier'] = outlier_labels

In [None]:
df_cleaned = df_numeric[df_numeric['Outlier'] != -1].drop(columns='Outlier')

## 4. Visualize Daily Closing Prices


In [None]:
plt.figure(figsize=(12, 5))
plt.plot(df_numeric.index, df_numeric['close'], label='Original')
plt.scatter(df_numeric.index[df_numeric['Outlier'] == -1],
            df_numeric['close'][df_numeric['Outlier'] == -1],
            color='red', label='Outliers')
plt.title("NIFTY 50 Daily Close with Outliers Detected")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
df_cleaned_reset = df_cleaned.reset_index().rename(columns={'date': 'ds', 'close': 'y'})

In [None]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df_cleaned_reset, test_size=0.1, shuffle=False)


In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(daily_df, model='multiplicative', period=30)
plt.rcParams.update({'figure.figsize': (12, 8)})
decomposition.plot()
plt.suptitle("Time Series Decomposition - NIFTY 50 (Daily Close)", fontsize=16)
plt.show()

In [None]:
!pip install prophet

## 5. Forecasting with Facebook Prophet


In [None]:
from prophet import Prophet
model = Prophet(
    daily_seasonality=True,
    seasonality_mode='multiplicative',
    changepoint_range=0.8,
    changepoint_prior_scale=0.2
)
model.fit(train_df)

In [None]:
future = model.make_future_dataframe(periods=len(test_df))  # Forecast length = test set
forecast = model.predict(future)

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(df_cleaned_reset['ds'], df_cleaned_reset['y'], label='Actual')
plt.plot(forecast['ds'], forecast['yhat'], label='Forecast')
plt.axvline(x=test_df['ds'].iloc[0], color='black', linestyle='--', label='Train/Test Split')
plt.title("Prophet Forecast vs Actual")
plt.xlabel("Date")
plt.ylabel("NIFTY 50 Close Price")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
merged = forecast[['ds', 'yhat']].set_index('ds').join(df_cleaned_reset.set_index('ds'), how='inner')
test_merged = pd.merge(test_df, forecast[['ds', 'yhat']], on='ds', how='inner')


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae = mean_absolute_error(test_merged['y'], test_merged['yhat'])
rmse = np.sqrt(mean_squared_error(test_merged['y'], test_merged['yhat']))
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

In [None]:
ts = df_cleaned['close']

In [None]:
train_size = int(len(ts) * 0.8)
train, test = ts.iloc[:train_size], ts.iloc[train_size:]

In [None]:
!pip install pmdarima


## 6. Forecasting with ARIMA


In [None]:
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
import warnings
warnings.filterwarnings("ignore")
from pmdarima import auto_arima

model = auto_arima(train,
                   start_p=0, max_p=5,
                   start_q=0, max_q=5,
                   d=None,
                   seasonal=False,
                   trace=True,
                   error_action='ignore',
                   suppress_warnings=True,
                   stepwise=True)

print(model.summary())
forecast = model.predict(n_periods=len(test))

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Actual Test')
plt.plot(test.index, forecast, label='ARIMA Forecast')
plt.axvline(x=test.index[0], color='black', linestyle='--', label='Train/Test Split')
plt.title("ARIMA Forecast vs Actual")
plt.xlabel("Date")
plt.ylabel("NIFTY 50 Close")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae_arima = mean_absolute_error(test, forecast)
rmse_arima = np.sqrt(mean_squared_error(test, forecast))

print(f"ARIMA MAE: {mae_arima:.2f}")
print(f"ARIMA RMSE: {rmse_arima:.2f}")

## 7. Forecasting with LSTM (Deep Learning)


In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import tensorflow as tf

In [None]:
ts_lstm = df_cleaned['close'].values.reshape(-1, 1)

In [None]:
scaler = MinMaxScaler()
ts_scaled = scaler.fit_transform(ts_lstm)

In [None]:
def create_sequences(data, window=30):
    X, y = [], []
    for i in range(window, len(data)):
        X.append(data[i-window:i])
        y.append(data[i])
    return np.array(X), np.array(y)

window_size = 30
X, y = create_sequences(ts_scaled, window_size)

In [None]:
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model_lstm = Sequential([
    LSTM(100, return_sequences=False, input_shape=(X_train.shape[1], 1)),
    Dropout(0.2),
    Dense(1)
])
model_lstm.compile(optimizer='adam', loss='mean_squared_error')


In [None]:
history = model_lstm.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test), verbose=0)


In [None]:
y_pred_scaled = model_lstm.predict(X_test)
y_pred = scaler.inverse_transform(y_pred_scaled)
y_test_true = scaler.inverse_transform(y_test)

In [None]:
test_dates = df_cleaned.index[-len(y_test):]

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(test_dates, y_test_true, label='Actual')
plt.plot(test_dates, y_pred, label='LSTM Forecast')
plt.title("LSTM Forecast vs Actual")
plt.xlabel("Date")
plt.ylabel("NIFTY 50 Close Price")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
mae_lstm = mean_absolute_error(y_test_true, y_pred)
rmse_lstm = np.sqrt(mean_squared_error(y_test_true, y_pred))

print(f"LSTM MAE: {mae_lstm:.2f}")
print(f"LSTM RMSE: {rmse_lstm:.2f}")

In [None]:
#Accuracy
import numpy as np
import matplotlib.pyplot as plt

mean_actual = np.mean(y_test_true) 
mae_dict = {
    'Prophet': 2370.73,
    'ARIMA': 2282.36,
    'LSTM': 228.28
}
accuracy_dict = {
    model: round((1 - mae / mean_actual) * 100, 2)
    for model, mae in mae_dict.items()
}

plt.figure(figsize=(10, 5))
bars = plt.bar(accuracy_dict.keys(), accuracy_dict.values(),)
plt.title("Forecast Accuracy (%) by Model", fontsize=14, weight='bold')
plt.ylabel("Accuracy (%)")
plt.ylim(0, 100)
plt.grid(axis='y', linestyle='--', alpha=0.5)

for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 1, f"{yval:.2f}%", ha='center', fontsize=10)

plt.tight_layout()
plt.show()


## 8. Conclusion

- **Prophet** is great for trend-based forecasting with quick setup.
- **ARIMA** is effective for short-term autoregressive patterns.
- **LSTM** captures temporal dynamics well but needs careful tuning.

Each model has trade-offs. Prophet is interpretable, ARIMA is classic, and LSTM is powerful for non-linear data.

