# Predicting NIFTY Prices for the Current Day

## Data Gathering

### Importing all the libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import nselib
from nselib import capital_market
from datetime import datetime

import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")

### Declare Required Variables

In [2]:
# Declare Start and End Date(today); 
from_date = '01-01-2015'
to_date_str = datetime.today().strftime('%d-%m-%Y')

### Get data from nselib 

#### Get NIFTY Data

In [3]:
index_data = capital_market.index_data(index="Nifty 50", from_date=from_date, to_date=to_date_str)

In [4]:
index_data.head()

Unnamed: 0,TIMESTAMP,INDEX_NAME,OPEN_INDEX_VAL,HIGH_INDEX_VAL,CLOSE_INDEX_VAL,LOW_INDEX_VAL,TRADED_QTY,TURN_OVER
0,09-11-2015,NIFTY 50,7788.25,7937.75,7915.2,7771.7,218422388,9376.17
1,10-11-2015,NIFTY 50,7877.6,7885.1,7783.35,7772.85,170267413,7153.47
2,11-11-2015,NIFTY 50,7838.8,7847.95,7825.0,7819.1,22380435,1123.44
3,13-11-2015,NIFTY 50,7762.45,7775.1,7762.25,7730.9,165876819,7731.55
4,16-11-2015,NIFTY 50,7732.95,7838.85,7806.6,7714.15,154134885,6871.15


In [5]:
index_data.shape

(2201, 8)

In [6]:
index_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2201 entries, 0 to 2200
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   TIMESTAMP        2201 non-null   object 
 1   INDEX_NAME       2201 non-null   object 
 2   OPEN_INDEX_VAL   2201 non-null   float64
 3   HIGH_INDEX_VAL   2201 non-null   float64
 4   CLOSE_INDEX_VAL  2201 non-null   float64
 5   LOW_INDEX_VAL    2201 non-null   float64
 6   TRADED_QTY       2201 non-null   int64  
 7   TURN_OVER        2201 non-null   float64
dtypes: float64(5), int64(1), object(2)
memory usage: 137.7+ KB


In [7]:
# Changing "TIMESTAMP" column's datatype and format, then sorting by "TIMESTAMP"
index_data['TIMESTAMP'] = pd.to_datetime(index_data['TIMESTAMP'], format='%d-%m-%Y').sort_values().reset_index(drop=True)

In [8]:
# Get only the reqyired rows
nifty_df = index_data[['TIMESTAMP', 'OPEN_INDEX_VAL', 'HIGH_INDEX_VAL', 'CLOSE_INDEX_VAL', 'LOW_INDEX_VAL', 'TRADED_QTY']]

In [9]:
# Rename the columns as required
nifty_df.rename(columns={
    'TIMESTAMP': 'Date',
    'OPEN_INDEX_VAL': 'nifty_Open',
    'HIGH_INDEX_VAL': 'nifty_High',
    'CLOSE_INDEX_VAL': 'nifty_Close',
    'LOW_INDEX_VAL': 'nifty_Low',
    'TRADED_QTY': 'nifty_QTY'
}, inplace=True)

#### Get India VIX data

In [10]:
# Get vix data from nselib
vixdata = capital_market.india_vix_data( from_date=from_date, to_date=to_date_str)

In [11]:
vixdata.head()

Unnamed: 0,TIMESTAMP,INDEX_NAME,OPEN_INDEX_VAL,CLOSE_INDEX_VAL,HIGH_INDEX_VAL,LOW_INDEX_VAL,PREV_CLOSE,VIX_PTS_CHG,VIX_PERC_CHG
0,01-JAN-2015,INDIA VIX,15.12,15.025,15.5075,14.755,15.12,-0.095,-0.628307
1,02-JAN-2015,INDIA VIX,15.025,13.795,15.025,13.4825,15.025,-1.23,-8.186356
2,05-JAN-2015,INDIA VIX,13.795,14.1525,14.405,13.1525,13.795,0.3575,2.591519
3,06-JAN-2015,INDIA VIX,14.1525,17.42,17.87,13.4575,14.1525,3.2675,23.087794
4,07-JAN-2015,INDIA VIX,17.42,18.14,19.07,16.5025,17.42,0.72,4.13318


In [12]:
vixdata.shape

(2410, 9)

In [13]:
vixdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2410 entries, 0 to 2409
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   TIMESTAMP        2410 non-null   object 
 1   INDEX_NAME       2410 non-null   object 
 2   OPEN_INDEX_VAL   2410 non-null   float64
 3   CLOSE_INDEX_VAL  2410 non-null   float64
 4   HIGH_INDEX_VAL   2410 non-null   float64
 5   LOW_INDEX_VAL    2410 non-null   float64
 6   PREV_CLOSE       2410 non-null   float64
 7   VIX_PTS_CHG      2410 non-null   float64
 8   VIX_PERC_CHG     2410 non-null   float64
dtypes: float64(7), object(2)
memory usage: 169.6+ KB


In [14]:
# Drop the specified columns
vixdata = vixdata.drop(columns=['INDEX_NAME', 'PREV_CLOSE', 'VIX_PTS_CHG', 'VIX_PERC_CHG'])

In [15]:
# Rename the columns
vixdata.rename(columns={
    'TIMESTAMP': 'Date',
    'OPEN_INDEX_VAL': 'vix_Open',
    'HIGH_INDEX_VAL': 'vix_High',
    'CLOSE_INDEX_VAL': 'vix_Close',
    'LOW_INDEX_VAL': 'vix_Low',
}, inplace=True)

In [16]:
# Convert 'Date' column to required format as datetime
vixdata['Date'] = pd.to_datetime(vixdata['Date'], format='%d-%b-%Y')


#### Merge NIFTY and VIX dataframes based on column Date

In [17]:
# Join the DataFrames on the 'Date' column
df = pd.merge(nifty_df, vixdata, on='Date', how='inner')

In [18]:
df.head()

Unnamed: 0,Date,nifty_Open,nifty_High,nifty_Close,nifty_Low,nifty_QTY,vix_Open,vix_Close,vix_High,vix_Low
0,2015-11-09,7788.25,7937.75,7915.2,7771.7,218422388,19.4725,17.04,19.4725,15.99
1,2015-11-10,7877.6,7885.1,7783.35,7772.85,170267413,17.04,16.825,17.0625,15.2975
2,2015-11-11,7838.8,7847.95,7825.0,7819.1,22380435,16.825,16.905,16.9475,14.5325
3,2015-11-13,7762.45,7775.1,7762.25,7730.9,165876819,16.905,17.6525,17.7225,15.795
4,2015-11-16,7732.95,7838.85,7806.6,7714.15,154134885,17.6525,17.8775,18.6075,15.2875


In [19]:
df.shape

(2201, 10)

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2201 entries, 0 to 2200
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         2201 non-null   datetime64[ns]
 1   nifty_Open   2201 non-null   float64       
 2   nifty_High   2201 non-null   float64       
 3   nifty_Close  2201 non-null   float64       
 4   nifty_Low    2201 non-null   float64       
 5   nifty_QTY    2201 non-null   int64         
 6   vix_Open     2201 non-null   float64       
 7   vix_Close    2201 non-null   float64       
 8   vix_High     2201 non-null   float64       
 9   vix_Low      2201 non-null   float64       
dtypes: datetime64[ns](1), float64(8), int64(1)
memory usage: 172.1 KB


## Linear Regression Model

### Building a Linear regression model

In [33]:
# Create a new column called "PrevClose" and delete rows with empty values
series_shifted = df['nifty_Close'].shift()
df['PrevClose'] = series_shifted
df.dropna(inplace=True)

In [22]:
# Prepare the features and target variables
features = ['nifty_Close', 'nifty_Open', 'nifty_High', 'nifty_Low', 'nifty_QTY', 'vix_Open', 'vix_Close', 'vix_High', 'vix_Low']

# Split the data into train and test sets for each target
X = df[features]

# shift is done to get the next value
y_open = df['nifty_Open'].shift(-1).dropna()
y_high = df['nifty_High'].shift(-1).dropna()
y_low = df['nifty_Low'].shift(-1).dropna()
y_close = df['nifty_Close'].shift(-1).dropna()

# Align features with shifted targets
X = X.iloc[:-1, :]

In [23]:
# Initialize variables to track minimum MAE and corresponding details for each target variable
min_mae_open = float('inf')
min_mae_high = float('inf')
min_mae_low = float('inf')
min_mae_close = float('inf')

best_random_state_open = None
best_test_size_open = None
best_random_state_high = None
best_test_size_high = None
best_random_state_low = None
best_test_size_low = None
best_random_state_close = None
best_test_size_close = None

In [24]:
# Loop over different random states and test sizes
# Calculate MAE for Each
# Condiser random states and test sizes with least MAE

random_states = list(range(20))
test_sizes = np.arange(0.05, 1.55, 0.01)

for random_state in random_states:
    for test_size in test_sizes:
        # Split data into train and test sets
        try:
            X_train, X_test, y_open_train, y_open_test = train_test_split(X, y_open, random_state=random_state, test_size=test_size)
            X_train, X_test, y_high_train, y_high_test = train_test_split(X, y_high, random_state=random_state, test_size=test_size)
            X_train, X_test, y_low_train, y_low_test = train_test_split(X, y_low, random_state=random_state, test_size=test_size)
            X_train, X_test, y_close_train, y_close_test = train_test_split(X, y_close, random_state=random_state, test_size=test_size)
        except ValueError:
            # Handle cases where test_size is too large for the dataset
            continue
        
        # Fit the linear regression models for OpenPrice
        regressor_open = LinearRegression()
        regressor_open.fit(X_train, y_open_train)
        open_predict = regressor_open.predict(X_test)
        mae_open = metrics.mean_absolute_error(y_open_test, open_predict)
        if mae_open < min_mae_open:
            min_mae_open = mae_open
            best_random_state_open = random_state
            best_test_size_open = test_size

        # Fit the linear regression models for HighPrice
        regressor_high = LinearRegression()
        regressor_high.fit(X_train, y_high_train)
        high_predict = regressor_high.predict(X_test)
        mae_high = metrics.mean_absolute_error(y_high_test, high_predict)
        if mae_high < min_mae_high:
            min_mae_high = mae_high
            best_random_state_high = random_state
            best_test_size_high = test_size

        # Fit the linear regression models for LowPrice
        regressor_low = LinearRegression()
        regressor_low.fit(X_train, y_low_train)
        low_predict = regressor_low.predict(X_test)
        mae_low = metrics.mean_absolute_error(y_low_test, low_predict)
        if mae_low < min_mae_low:
            min_mae_low = mae_low
            best_random_state_low = random_state
            best_test_size_low = test_size

        # Fit the linear regression models for ClosePrice
        regressor_close = LinearRegression()
        regressor_close.fit(X_train, y_close_train)
        close_predict = regressor_close.predict(X_test)
        mae_close = metrics.mean_absolute_error(y_close_test, close_predict)
        if mae_close < min_mae_close:
            min_mae_close = mae_close
            best_random_state_close = random_state
            best_test_size_close = test_size

In [25]:
# Print the best combinations of random states and test sizes for each target variable
print(f"Best combination for OpenPrice:")
print(f"Random State: {best_random_state_open}, Test Size: {round(best_test_size_open, 2)}, MAE: {round(min_mae_open, 2)}")
print("")
print(f"Best combination for HighPrice:")
print(f"Random State: {best_random_state_high}, Test Size: {round(best_test_size_high, 2)}, MAE: {round(min_mae_high, 2)}")
print("")
print(f"Best combination for LowPrice:")
print(f"Random State: {best_random_state_low}, Test Size: {round(best_test_size_low, 2)}, MAE: {round(min_mae_low, 2)}")
print("")
print(f"Best combination for ClosePrice:")
print(f"Random State: {best_random_state_close}, Test Size: {round(best_test_size_close, 2)}, MAE: {round(min_mae_close, 2)}")
print("")

Best combination for OpenPrice:
Random State: 12, Test Size: 0.05, MAE: 60.39

Best combination for HighPrice:
Random State: 19, Test Size: 0.07, MAE: 69.83

Best combination for LowPrice:
Random State: 7, Test Size: 0.06, MAE: 77.55

Best combination for ClosePrice:
Random State: 7, Test Size: 0.06, MAE: 88.52



### Fit the final models using the best parameters for today's prediction

In [26]:
# OpenPrice
X_train, X_test, y_open_train, y_open_test = train_test_split(X, y_open, random_state=best_random_state_open, test_size=best_test_size_open)
regressor_open = LinearRegression()
regressor_open.fit(X_train, y_open_train)

In [27]:
# HighPrice
X_train, X_test, y_high_train, y_high_test = train_test_split(X, y_high, random_state=best_random_state_high, test_size=best_test_size_high)
regressor_high = LinearRegression()
regressor_high.fit(X_train, y_high_train)

In [28]:
# LowPrice
X_train, X_test, y_low_train, y_low_test = train_test_split(X, y_low, random_state=best_random_state_low, test_size=best_test_size_low)
regressor_low = LinearRegression()
regressor_low.fit(X_train, y_low_train)

In [29]:
# ClosePrice
X_train, X_test, y_close_train, y_close_test = train_test_split(X, y_close, random_state=best_random_state_close, test_size=best_test_size_close)
regressor_close = LinearRegression()
regressor_close.fit(X_train, y_close_train)

### Predict the Today's price

In [30]:
# Define a function to predict next day's prices
def predict_next_day(today_values):
    predicted_open = regressor_open.predict(today_values)
    predicted_high = regressor_high.predict(today_values)
    predicted_low = regressor_low.predict(today_values)
    predicted_close = regressor_close.predict(today_values)
    
    return {
        "OpenPrice": round(predicted_open[0], 2),
        "HighPrice": round(predicted_high[0], 2),
        "LowPrice": round(predicted_low[0], 2),
        "ClosePrice": round(predicted_close[0], 2)
    }

In [31]:
# Example usage:
today_values = pd.DataFrame({
    "nifty_Close": [df['nifty_Close'].iloc[-1]],
    "nifty_Open": [df['nifty_Open'].iloc[-1]],
    "nifty_High": [df['nifty_High'].iloc[-1]],
    "nifty_Low": [df['nifty_Low'].iloc[-1]],
    "nifty_QTY": [df['nifty_QTY'].iloc[-1]],
    "vix_Open": [df['vix_Open'].iloc[-1]],
    "vix_Close": [df['vix_Close'].iloc[-1]],
    "vix_High": [df['vix_High'].iloc[-1]],
    "vix_Low": [df['vix_Low'].iloc[-1]]
})

### Print the predicted price

In [32]:
next_day_prediction = predict_next_day(today_values)

# Print predicted prices separately with ranges
open_price = next_day_prediction['OpenPrice']
high_price = next_day_prediction['HighPrice']
low_price = next_day_prediction['LowPrice']
close_price = next_day_prediction['ClosePrice']

open_price_range = f"{round(open_price - (min_mae_open)/4, 2)} to {round(open_price + (min_mae_open)/4, 2)}"
high_price_range = f"{round(high_price - (min_mae_high)/4, 2)} to {round(high_price + (min_mae_high)/4, 2)}"
low_price_range = f"{round(low_price - (min_mae_low)/4, 2)} to {round(low_price + (min_mae_low)/4, 2)}"
close_price_range = f"{round(close_price - (min_mae_close)/4, 2)} to {round(close_price + (min_mae_close)/4, 2)}"

print(f"Prediction for NIFTY50 ({to_date_str}): ")
print(f"Predicted Open Price: {open_price} ({open_price_range})")
print(f"Predicted High Price: {high_price} ({high_price_range})")
print(f"Predicted Low Price: {low_price} ({low_price_range})")
print(f"Predicted Close Price: {close_price} ({close_price_range})")

Prediction for NIFTY50 (21-09-2024): 
Predicted Open Price: 25428.13 (25413.03 to 25443.23)
Predicted High Price: 25546.31 (25528.85 to 25563.77)
Predicted Low Price: 25297.27 (25277.88 to 25316.66)
Predicted Close Price: 25433.12 (25410.99 to 25455.25)
