In [1]:
import pandas as pd

path = "NIFTY 50_minute.csv"
df = pd.read_csv(path)

df.head()
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 975321 entries, 0 to 975320
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   date    975321 non-null  object 
 1   open    975321 non-null  float64
 2   high    975321 non-null  float64
 3   low     975321 non-null  float64
 4   close   975321 non-null  float64
 5   volume  975321 non-null  int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 44.6+ MB


In [2]:
df['date'] = pd.to_datetime(
    df['date'],
    format='mixed',
    dayfirst=True,
    errors='coerce'
)


In [3]:
df = df.dropna(subset=['date'])


In [4]:
df = df.sort_values('date')
df = df.set_index('date')

In [5]:
df.index.min(), df.index.max()
df.index.inferred_type

'datetime64'

In [6]:
df = df.between_time("09:15", "15:30") # NSE Market hours

In [7]:
df.index.time.min(), df.index.time.max()


(datetime.time(9, 15), datetime.time(15, 29, 2))

In [8]:
end_date = df.index.max()
start_date = end_date - pd.DateOffset(years=1)

df = df.loc[start_date:end_date]

In [9]:
df.index.min(), df.index.max() # sanity check

(Timestamp('2024-07-25 15:29:00'), Timestamp('2025-07-25 15:29:00'))

In [10]:
df_5min = df.resample(
    '5min',
    label='left',
    closed='left'
).agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volume': 'sum'
})

In [11]:
df_5min = df_5min.dropna()

In [12]:
df_5min.index.to_series().diff().value_counts().head() # Final sanity check

date
0 days 00:05:00    18426
0 days 17:50:00      190
2 days 17:50:00       45
1 days 17:50:00        8
3 days 17:50:00        6
Name: count, dtype: int64

In [13]:
df_5min.head(10)



Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-07-25 15:25:00,24415.75,24423.15,24412.3,24415.0,0
2024-07-26 09:15:00,24423.35,24473.5,24410.9,24467.45,0
2024-07-26 09:20:00,24463.95,24492.7,24459.5,24477.45,0
2024-07-26 09:25:00,24478.15,24489.55,24473.5,24477.2,0
2024-07-26 09:30:00,24477.65,24494.2,24462.0,24490.45,0
2024-07-26 09:35:00,24490.75,24526.6,24490.75,24513.55,0
2024-07-26 09:40:00,24513.7,24528.95,24513.65,24526.6,0
2024-07-26 09:45:00,24526.4,24531.25,24514.45,24516.2,0
2024-07-26 09:50:00,24517.4,24530.15,24511.45,24524.6,0
2024-07-26 09:55:00,24525.45,24551.65,24521.7,24548.85,0


In [14]:
df_5min.tail(10)


Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-07-25 14:40:00,24843.6,24848.3,24827.4,24832.15,0
2025-07-25 14:45:00,24832.3,24836.9,24822.4,24824.1,0
2025-07-25 14:50:00,24823.6,24830.4,24818.15,24825.35,0
2025-07-25 14:55:00,24825.2,24834.35,24824.25,24833.95,0
2025-07-25 15:00:00,24833.7,24839.75,24808.4,24829.55,0
2025-07-25 15:05:00,24829.1,24849.0,24829.1,24849.0,0
2025-07-25 15:10:00,24848.0,24849.0,24841.0,24844.4,0
2025-07-25 15:15:00,24844.4,24845.4,24832.5,24833.7,0
2025-07-25 15:20:00,24833.7,24844.3,24832.75,24838.8,0
2025-07-25 15:25:00,24838.8,24841.05,24823.05,24832.2,0


In [15]:
df_5min.index.to_series().diff().value_counts().head()

date
0 days 00:05:00    18426
0 days 17:50:00      190
2 days 17:50:00       45
1 days 17:50:00        8
3 days 17:50:00        6
Name: count, dtype: int64

In [16]:
df_5min.to_csv("data/nifty_spot_5min.csv")

In [17]:
'''Due to the unavailability of free intraday NIFTY futures data with open interest,
a synthetic continuous futures series is constructed using cost-of-carry assumptions based on spot prices.'''


'Due to the unavailability of free intraday NIFTY futures data with open interest,\na synthetic continuous futures series is constructed using cost-of-carry assumptions based on spot prices.'

In [18]:
import pandas as pd
import numpy as np

spot = pd.read_csv(
    "data/nifty_spot_5min.csv",
    parse_dates=['date'],
    index_col='date'
)

spot.head()


Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-07-25 15:25:00,24415.75,24423.15,24412.3,24415.0,0
2024-07-26 09:15:00,24423.35,24473.5,24410.9,24467.45,0
2024-07-26 09:20:00,24463.95,24492.7,24459.5,24477.45,0
2024-07-26 09:25:00,24478.15,24489.55,24473.5,24477.2,0
2024-07-26 09:30:00,24477.65,24494.2,24462.0,24490.45,0


In [19]:
annual_rate = 0.065
days_to_expiry = 30
carry = annual_rate * (days_to_expiry / 365)


In [20]:
futures = spot.copy()

futures['fut_close'] = futures['close'] * (1 + carry)
futures['fut_open']  = futures['open']  * (1 + carry)
futures['fut_high']  = futures['high']  * (1 + carry)
futures['fut_low']   = futures['low']   * (1 + carry)


In [21]:
futures = futures[['fut_open','fut_high','fut_low','fut_close','volume']]


In [22]:
futures.columns = ['open','high','low','close','volume']


In [23]:
np.random.seed(42)

base_oi = 1_000_000
noise = np.random.normal(0, 50_000, len(futures))

futures['open_interest'] = base_oi + noise.cumsum()
futures['open_interest'] = futures['open_interest'].clip(lower=500_000)


In [24]:
futures.head()


Unnamed: 0_level_0,open,high,low,close,volume,open_interest
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-07-25 15:25:00,24546.190308,24553.629842,24542.721877,24545.436301,0,1024836.0
2024-07-26 09:15:00,24553.830911,24604.248836,24541.314397,24598.166514,0,1017922.0
2024-07-26 09:20:00,24594.647815,24623.551411,24590.174041,24608.219938,0,1050307.0
2024-07-26 09:25:00,24608.923678,24620.384582,24604.248836,24607.968603,0,1126458.0
2024-07-26 09:30:00,24608.421007,24625.059425,24592.687397,24621.28939,0,1114751.0


In [25]:
futures.tail()


Unnamed: 0_level_0,open,high,low,close,volume,open_interest
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-07-25 15:05:00,24961.748616,24981.754932,24961.748616,24981.754932,0,5977491.0
2025-07-25 15:10:00,24980.749589,24981.754932,24973.712192,24977.130356,0,6038421.0
2025-07-25 15:15:00,24977.130356,24978.135699,24965.166781,24966.373192,0,6020911.0
2025-07-25 15:20:00,24966.373192,24977.029822,24965.418116,24971.500438,0,5930986.0
2025-07-25 15:25:00,24971.500438,24973.762459,24955.666295,24964.865178,0,5934496.0


In [26]:
futures.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 18676 entries, 2024-07-25 15:25:00 to 2025-07-25 15:25:00
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   open           18676 non-null  float64
 1   high           18676 non-null  float64
 2   low            18676 non-null  float64
 3   close          18676 non-null  float64
 4   volume         18676 non-null  int64  
 5   open_interest  18676 non-null  float64
dtypes: float64(5), int64(1)
memory usage: 1021.3 KB


In [27]:
futures.to_csv("data/nifty_futures_5min.csv")


In [28]:
#NIFTY OPTIONS

In [29]:
strike_step = 50

options = spot.copy()
options['atm_strike'] = (
    (options['close'] / strike_step).round() * strike_step
).astype(int)


In [30]:
options[['close','atm_strike']].head()


Unnamed: 0_level_0,close,atm_strike
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-07-25 15:25:00,24415.0,24400
2024-07-26 09:15:00,24467.45,24450
2024-07-26 09:20:00,24477.45,24500
2024-07-26 09:25:00,24477.2,24500
2024-07-26 09:30:00,24490.45,24500


In [31]:
#Generating ATM ±1, ±2 Strikes

In [32]:
strike_offsets = [-2, -1, 0, 1, 2]
strike_step = 50

strike_rows = []

for ts, row in options.iterrows():
    atm = row['atm_strike']
    for offset in strike_offsets:
        strike_rows.append({
            'date': ts,
            'spot_close': row['close'],
            'strike': atm + offset * strike_step
        })

options_strikes = pd.DataFrame(strike_rows)
options_strikes = options_strikes.set_index('date')


In [33]:
options_strikes.head(10)


Unnamed: 0_level_0,spot_close,strike
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-07-25 15:25:00,24415.0,24300.0
2024-07-25 15:25:00,24415.0,24350.0
2024-07-25 15:25:00,24415.0,24400.0
2024-07-25 15:25:00,24415.0,24450.0
2024-07-25 15:25:00,24415.0,24500.0
2024-07-26 09:15:00,24467.45,24350.0
2024-07-26 09:15:00,24467.45,24400.0
2024-07-26 09:15:00,24467.45,24450.0
2024-07-26 09:15:00,24467.45,24500.0
2024-07-26 09:15:00,24467.45,24550.0


In [34]:
import numpy as np
from scipy.stats import norm


In [35]:
def black_scholes_price(S, K, T, r, sigma, option_type='call'):
    if T <= 0 or sigma <= 0:
        return 0.0

    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)

    if option_type == 'call':
        price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    else:
        price = K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)

    return price


In [36]:
risk_free_rate = 0.065
days_to_expiry = 30
T = days_to_expiry / 365
base_iv = 0.18


In [37]:
def adjusted_iv(spot, strike, base_iv):
    moneyness = abs(spot - strike) / spot
    return base_iv * (1 + 2 * moneyness)


In [38]:
options_strikes['iv'] = options_strikes.apply(
    lambda x: adjusted_iv(x['spot_close'], x['strike'], base_iv),
    axis=1
)

options_strikes['call_ltp'] = options_strikes.apply(
    lambda x: black_scholes_price(
        S=x['spot_close'],
        K=x['strike'],
        T=T,
        r=risk_free_rate,
        sigma=x['iv'],
        option_type='call'
    ),
    axis=1
)

options_strikes['put_ltp'] = options_strikes.apply(
    lambda x: black_scholes_price(
        S=x['spot_close'],
        K=x['strike'],
        T=T,
        r=risk_free_rate,
        sigma=x['iv'],
        option_type='put'
    ),
    axis=1
)


In [39]:
options_strikes[['spot_close','strike','iv','call_ltp','put_ltp']].head(10)


Unnamed: 0_level_0,spot_close,strike,iv,call_ltp,put_ltp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-07-25 15:25:00,24415.0,24300.0,0.181696,636.400594,391.924844
2024-07-25 15:25:00,24415.0,24350.0,0.180958,606.594063,411.851902
2024-07-25 15:25:00,24415.0,24400.0,0.180221,577.537594,432.529023
2024-07-25 15:25:00,24415.0,24450.0,0.180516,552.109468,456.834485
2024-07-25 15:25:00,24415.0,24500.0,0.181253,528.703719,483.162325
2024-07-26 09:15:00,24467.45,24350.0,0.181728,639.098068,391.905907
2024-07-26 09:15:00,24467.45,24400.0,0.180992,609.258026,411.799454
2024-07-26 09:15:00,24467.45,24450.0,0.180257,580.165915,432.440932
2024-07-26 09:15:00,24467.45,24500.0,0.180479,554.498749,456.507355
2024-07-26 09:15:00,24467.45,24550.0,0.181215,531.052101,482.794296


In [40]:
(options_strikes['call_ltp'] > 0).all()
(options_strikes['put_ltp'] > 0).all()


np.True_

In [41]:
import numpy as np

option_rows = []

for _, row in options_strikes.iterrows():
    for opt_type in ['CE', 'PE']:
        option_rows.append({
            'date': row.name,
            'spot_close': row['spot_close'],
            'strike': row['strike'],
            'option_type': opt_type,
            'ltp': row['call_ltp'] if opt_type == 'CE' else row['put_ltp'],
            'iv': row['iv']
        })

options_long = pd.DataFrame(option_rows).set_index('date')

np.random.seed(42)
options_long['open_interest'] = (
    200_000 + np.random.randint(-20_000, 20_000, len(options_long))
)

options_long['open_interest'] = options_long['open_interest'].clip(lower=50_000)

In [42]:
options_long.groupby(level=0).size().head()


date
2024-07-25 15:25:00    10
2024-07-26 09:15:00    10
2024-07-26 09:20:00    10
2024-07-26 09:25:00    10
2024-07-26 09:30:00    10
dtype: int64

In [43]:
options_long['open_interest'].min()

180000

In [44]:
options_long.head()

Unnamed: 0_level_0,spot_close,strike,option_type,ltp,iv,open_interest
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-07-25 15:25:00,24415.0,24300.0,CE,636.400594,0.181696,195795
2024-07-25 15:25:00,24415.0,24300.0,PE,391.924844,0.181696,180860
2024-07-25 15:25:00,24415.0,24350.0,CE,606.594063,0.180958,218158
2024-07-25 15:25:00,24415.0,24350.0,PE,411.851902,0.180958,191284
2024-07-25 15:25:00,24415.0,24400.0,CE,577.537594,0.180221,186265


In [45]:
options_long['volume'] = np.random.randint(50, 500, len(options_long))


In [46]:
options_long.to_csv("data/nifty_options_5min.csv")


In [49]:
spot.shape

(18676, 5)

In [50]:
futures.shape

(18676, 6)

In [51]:
options.shape

(18676, 6)