In [1]:
import pandas as pd
import numpy as np


In [2]:
spot = pd.read_csv(
    "data/processed/nifty_spot_clean_5min.csv",
    parse_dates=['date'],
    index_col='date'
)

futures = pd.read_csv(
    "data/processed/nifty_futures_clean_5min.csv",
    parse_dates=['date'],
    index_col='date'
)

options = pd.read_csv(
    "data/processed/nifty_options_clean_5min.csv",
    parse_dates=['date'],
    index_col='date'
)


In [3]:
spot.shape, futures.shape, options.shape


((18676, 5), (18676, 6), (186760, 7))

In [4]:
spot['log_return'] = np.log(spot['close']).diff()
futures['log_return'] = np.log(futures['close']).diff()


In [5]:
spot['pct_return'] = spot['close'].pct_change()
futures['pct_return'] = futures['close'].pct_change()


In [6]:
spot['vol_30'] = spot['log_return'].rolling(30).std() * np.sqrt(30)
spot['vol_60'] = spot['log_return'].rolling(60).std() * np.sqrt(60)


In [7]:
for span in [9, 21, 50]:
    spot[f'ema_{span}'] = spot['close'].ewm(span=span, adjust=False).mean()
    futures[f'ema_{span}'] = futures['close'].ewm(span=span, adjust=False).mean()


In [8]:
spot['trend_9_21'] = spot['ema_9'] - spot['ema_21']
spot['trend_21_50'] = spot['ema_21'] - spot['ema_50']


In [9]:
 atm_options = options[options['strike'] == options['spot_close'].round(-2)]


In [10]:
from scipy.stats import norm

def bs_d1(S, K, T, r, sigma):
    return (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))

def delta(S, K, T, r, sigma, option_type):
    d1 = bs_d1(S, K, T, r, sigma)
    return norm.cdf(d1) if option_type == 'CE' else norm.cdf(d1) - 1

def gamma(S, K, T, r, sigma):
    d1 = bs_d1(S, K, T, r, sigma)
    return norm.pdf(d1) / (S * sigma * np.sqrt(T))


In [11]:
T = 1 / 252   # 1 trading day
r = 0.06


In [12]:
atm_options['delta'] = atm_options.apply(
    lambda x: delta(x['spot_close'], x['strike'], T, r, x['iv'], x['option_type']),
    axis=1
)

atm_options['gamma'] = atm_options.apply(
    lambda x: gamma(x['spot_close'], x['strike'], T, r, x['iv']),
    axis=1
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  atm_options['delta'] = atm_options.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  atm_options['gamma'] = atm_options.apply(


In [13]:
pcr = (
    options
    .groupby([options.index, 'option_type'])['open_interest']
    .sum()
    .unstack()
)

pcr['pcr_oi'] = pcr['PE'] / pcr['CE']


In [14]:
spot = spot.merge(pcr[['pcr_oi']], left_index=True, right_index=True, how='left')


In [15]:
spot[['log_return', 'vol_30', 'ema_9', 'pcr_oi']].dropna().head()


Unnamed: 0_level_0,log_return,vol_30,ema_9,pcr_oi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-07-26 11:40:00,0.000146,0.003085,24632.291009,0.931611
2024-07-26 11:45:00,0.000138,0.002425,24633.662807,1.015456
2024-07-26 11:50:00,0.00016,0.002419,24635.550246,1.014858
2024-07-26 11:55:00,0.000323,0.002408,24638.650197,0.987632
2024-07-26 12:00:00,-8.1e-05,0.002407,24640.730157,0.999211


In [16]:
spot.to_csv("data/processed/nifty_spot_features_5min.csv")

In [17]:
spot.columns

Index(['open', 'high', 'low', 'close', 'volume', 'log_return', 'pct_return',
       'vol_30', 'vol_60', 'ema_9', 'ema_21', 'ema_50', 'trend_9_21',
       'trend_21_50', 'pcr_oi'],
      dtype='object')