In [1]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd

PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT))

sys.path.insert(0, str(PROJECT_ROOT / "qtenv" / "Lib" / "site-packages"))

from src.features import (
    compute_ema, compute_returns, futures_basis,
    pcr_oi, pcr_volume, avg_iv, iv_spread
)
from src.greeks import compute_greeks

RISK_FREE_RATE = 0.065
STRIKE_STEP = 50

df = pd.read_csv(
    PROJECT_ROOT / "data" / "merged" / "nifty_merged_5min.csv",
    parse_dates=["timestamp"]
).sort_values("timestamp").reset_index(drop=True)



#### EMA Indicators

In [2]:
df['ema_5'] = compute_ema(df, 5)
df['ema_15'] = compute_ema(df, 15)

#### ATM Greeks (Call and Put)

In [3]:
STRIKE_STEP = 50
RISH_FREE_RATE = 0.065

df["atm_strike"]= (df["spot_close"]/STRIKE_STEP).round() * STRIKE_STEP
df["TIME_TO_EXPIRY"] = 7/365

In [6]:
STRIKE_STEP = 50
RISK_FREE_RATE = 0.065

# ATM STRIKE + EXPIRY
df["atm_strike"] = (df["spot_close"] / STRIKE_STEP).round() * STRIKE_STEP
df["time_to_expiry"] = 7 / 365  # weekly expiry approximation

# VECTORISED ATM COLUMN NAMES
atm_str = df["atm_strike"].astype(int).astype(str)

ce_iv_cols  = "opt_iv_" + atm_str + "_CE"
pe_iv_cols  = "opt_iv_" + atm_str + "_PE"
ce_oi_cols  = "opt_open_interest_" + atm_str + "_CE"
pe_oi_cols  = "opt_open_interest_" + atm_str + "_PE"
ce_vol_cols = "opt_volume_" + atm_str + "_CE"
pe_vol_cols = "opt_volume_" + atm_str + "_PE"

# FAST COLUMN LOOKUP (NO APPLY)
df["atm_ce_iv"] = np.array([
    df.iloc[i][col] if col in df.columns else np.nan
    for i, col in enumerate(ce_iv_cols)
]) / 100

df["atm_pe_iv"] = np.array([
    df.iloc[i][col] if col in df.columns else np.nan
    for i, col in enumerate(pe_iv_cols)
]) / 100

df["atm_ce_oi"] = np.array([
    df.iloc[i][col] if col in df.columns else np.nan
    for i, col in enumerate(ce_oi_cols)
])

df["atm_pe_oi"] = np.array([
    df.iloc[i][col] if col in df.columns else np.nan
    for i, col in enumerate(pe_oi_cols)
])

df["atm_ce_vol"] = np.array([
    df.iloc[i][col] if col in df.columns else np.nan
    for i, col in enumerate(ce_vol_cols)
])

df["atm_pe_vol"] = np.array([
    df.iloc[i][col] if col in df.columns else np.nan
    for i, col in enumerate(pe_vol_cols)
])



In [12]:
import numpy as np
from py_vollib.black_scholes.greeks.analytical import (
    delta, gamma, theta, vega, rho
)

# mask valid rows
valid = df["atm_ce_iv"].notna() & df["atm_pe_iv"].notna()

S = df.loc[valid, "spot_close"].values
K = df.loc[valid, "atm_strike"].values
T = df.loc[valid, "time_to_expiry"].values
r = RISK_FREE_RATE

ce_iv = df.loc[valid, "atm_ce_iv"].values
pe_iv = df.loc[valid, "atm_pe_iv"].values

# vectorized wrappers (IMPORTANT)
v_delta = np.vectorize(delta)
v_gamma = np.vectorize(gamma)
v_theta = np.vectorize(theta)
v_vega  = np.vectorize(vega)
v_rho   = np.vectorize(rho)

# CALL Greeks
df.loc[valid, "ce_delta"] = v_delta("c", S, K, T, r, ce_iv)
df.loc[valid, "ce_gamma"] = v_gamma("c", S, K, T, r, ce_iv)
df.loc[valid, "ce_theta"] = v_theta("c", S, K, T, r, ce_iv)
df.loc[valid, "ce_vega"]  = v_vega("c", S, K, T, r, ce_iv)
df.loc[valid, "ce_rho"]   = v_rho("c", S, K, T, r, ce_iv)

# PUT Greeks
df.loc[valid, "pe_delta"] = v_delta("p", S, K, T, r, pe_iv)
df.loc[valid, "pe_gamma"] = v_gamma("p", S, K, T, r, pe_iv)
df.loc[valid, "pe_theta"] = v_theta("p", S, K, T, r, pe_iv)
df.loc[valid, "pe_vega"]  = v_vega("p", S, K, T, r, pe_iv)
df.loc[valid, "pe_rho"]   = v_rho("p", S, K, T, r, pe_iv)


#### Derived Features

In [13]:
df["avg_iv"] = avg_iv(df["atm_ce_iv"], df["atm_pe_iv"])
df["iv_spread"] = iv_spread(df["atm_ce_iv"], df["atm_pe_iv"])

df["pcr_oi"] = pcr_oi(df["atm_pe_oi"], df["atm_ce_oi"])
df["pcr_volume"] = pcr_volume(df["atm_pe_vol"], df["atm_ce_vol"])

df["futures_basis"] = futures_basis(df["fut_close"], df["spot_close"])

df["spot_returns"] = compute_returns(df["spot_close"])
df["futures_returns"] = compute_returns(df["fut_close"])

df["delta_neutral_ratio"] = abs(df["ce_delta"]) / abs(df["pe_delta"])
df["gamma_exposure"] = df["spot_close"] * df["ce_gamma"] * df["atm_ce_oi"]


In [14]:
FEATURE_DIR = PROJECT_ROOT / "data" / "features"
FEATURE_DIR.mkdir(parents=True, exist_ok=True)

df.to_csv(FEATURE_DIR / "nifty_features_5min.csv", index=False)

print("nifty_features_5min.csv saved")


nifty_features_5min.csv saved
