In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("../data/nifty_merged_5min.csv")
df["datetime"] = pd.to_datetime(df["datetime"])
df = df.sort_values("datetime")

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18676 entries, 0 to 18675
Data columns (total 16 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   datetime     18676 non-null  datetime64[ns]
 1   spot_open    18676 non-null  float64       
 2   spot_high    18676 non-null  float64       
 3   spot_low     18676 non-null  float64       
 4   spot_close   18676 non-null  float64       
 5   spot_volume  18676 non-null  int64         
 6   fut_open     18676 non-null  float64       
 7   fut_high     18676 non-null  float64       
 8   fut_low      18676 non-null  float64       
 9   fut_close    18676 non-null  float64       
 10  fut_volume   18676 non-null  int64         
 11  opt_open     18676 non-null  float64       
 12  opt_high     18676 non-null  float64       
 13  opt_low      18676 non-null  float64       
 14  opt_close    18676 non-null  float64       
 15  opt_volume   18676 non-null  int64         
dtypes: d

In [3]:
# EMA indicators
df["ema_5"] = df["spot_close"].ewm(span=5, adjust=False).mean()
df["ema_15"] = df["spot_close"].ewm(span=15, adjust=False).mean()

In [4]:
df["spot_return"] = df["spot_close"].pct_change()
df["fut_return"] = df["fut_close"].pct_change()


In [5]:
df["iv_proxy"] = df["spot_return"].rolling(20).std() * np.sqrt(252)
df["iv_proxy"] = df["iv_proxy"].bfill()


In [6]:
from py_vollib.black_scholes.greeks.analytical import delta, gamma, theta, vega, rho

In [7]:
RISK_FREE_RATE = 0.065
TIME_TO_EXPIRY = 7 / 365  # weekly options


In [8]:
def compute_greeks(row, flag):
    S = row["spot_close"]
    K = S  # ATM
    r = RISK_FREE_RATE
    t = TIME_TO_EXPIRY
    iv = row["iv_proxy"]

    try:
        return pd.Series({
            "delta": delta(flag, S, K, t, r, iv),
            "gamma": gamma(flag, S, K, t, r, iv),
            "theta": theta(flag, S, K, t, r, iv),
            "vega": vega(flag, S, K, t, r, iv),
            "rho": rho(flag, S, K, t, r, iv)
        })
    except:
        return pd.Series([np.nan]*5)


In [9]:
call_greeks = df.apply(lambda row: compute_greeks(row, "c"), axis=1)
call_greeks.columns = ["call_delta", "call_gamma", "call_theta", "call_vega", "call_rho"]

df = pd.concat([df, call_greeks], axis=1)


In [10]:
put_greeks = df.apply(lambda row: compute_greeks(row, "p"), axis=1)
put_greeks.columns = ["put_delta", "put_gamma", "put_theta", "put_vega", "put_rho"]

df = pd.concat([df, put_greeks], axis=1)


In [11]:
df["avg_iv"] = df["iv_proxy"]
df["iv_spread"] = df["call_delta"] - df["put_delta"]

In [12]:
df["pcr_volume"] = df["opt_volume"] / (df["opt_volume"] + 1)
df["pcr_oi"] = df["pcr_volume"]  # proxy

In [13]:
df["futures_basis"] = (df["fut_close"] - df["spot_close"]) / df["spot_close"]

In [14]:
df["delta_neutral_ratio"] = abs(df["call_delta"]) / (abs(df["put_delta"]) + 1e-6)

In [15]:
df["gamma_exposure"] = df["spot_close"] * df["call_gamma"]

In [16]:
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18675 entries, 1 to 18675
Data columns (total 38 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   datetime             18675 non-null  datetime64[ns]
 1   spot_open            18675 non-null  float64       
 2   spot_high            18675 non-null  float64       
 3   spot_low             18675 non-null  float64       
 4   spot_close           18675 non-null  float64       
 5   spot_volume          18675 non-null  int64         
 6   fut_open             18675 non-null  float64       
 7   fut_high             18675 non-null  float64       
 8   fut_low              18675 non-null  float64       
 9   fut_close            18675 non-null  float64       
 10  fut_volume           18675 non-null  int64         
 11  opt_open             18675 non-null  float64       
 12  opt_high             18675 non-null  float64       
 13  opt_low              18675 non-null 

In [17]:
df.to_csv("../data/nifty_features_5min.csv", index=False)
