### Imports

In [1]:
import pandas as pd
import numpy as np

### Load 5-min merged data

In [2]:
merged = pd.read_csv(r"C:\Users\jkmfi\Documents\Quant_nifty_project\data\clean\nifty_merged_5min.csv")

In [3]:
# Ensure Date column is datetime
merged['Date'] = pd.to_datetime(merged['Date'])
merged = merged.sort_values('Date').reset_index(drop=True)

### Basic Cleaning & Spot Features

In [4]:
# Drop rows where spot close is missing
merged = merged.dropna(subset=['Close_spot'])

In [5]:
# Spot returns
merged['spot_return'] = merged['Close_spot'].pct_change()
merged['spot_log_return'] = np.log(merged['Close_spot'] / merged['Close_spot'].shift(1))
merged['spot_vol_20'] = merged['spot_return'].rolling(20).std()


In [6]:
# EMA Trend Features
merged['EMA_5'] = merged['Close_spot'].ewm(span=5, adjust=False).mean()
merged['EMA_15'] = merged['Close_spot'].ewm(span=15, adjust=False).mean()
merged['ema_signal'] = np.where(merged['EMA_5'] > merged['EMA_15'], 1, -1)


### Futures Features

In [7]:
merged['fut_basis'] = merged['Close_fut'] - merged['Close_spot']
merged['fut_return'] = merged['Close_fut'].pct_change()

### Load & Process Options Data

In [8]:
options_clean = pd.read_csv(r"C:\Users\jkmfi\Documents\Quant_nifty_project\data\clean\options_clean.csv")

In [9]:
# Strip column names
options_clean.columns = options_clean.columns.str.strip()
options_clean['TradeDate'] = pd.to_datetime(options_clean['TradeDate'])

###  Aggregate Options by CE/PE

In [10]:
option_features = (
    options_clean
    .groupby(['TradeDate', 'OptionType'])
    .agg(
        OI_sum=('OI', 'sum'),
        CHNG_OI_sum=('CHNG IN OI', 'sum'),
        VOLUME_sum=('VOLUME', 'sum'),
        IV_mean=('IV', 'mean'),
        LTP_mean=('LTP', 'mean')
    )
    .reset_index()
)

In [11]:
# Pivot CE/PE into columns
option_pivot = option_features.pivot(
    index='TradeDate',
    columns='OptionType',
    values=['OI_sum', 'CHNG_OI_sum', 'VOLUME_sum', 'IV_mean', 'LTP_mean']
)

In [12]:
# Flatten MultiIndex columns
option_pivot.columns = [f"{metric}_{opt}" for metric, opt in option_pivot.columns]
option_pivot = option_pivot.reset_index()


### Merge Options with 5-min Data

In [13]:
# Normalize TradeDate in merged to date (for 5-min timestamps)
merged['TradeDate'] = merged['Date'].dt.normalize()

In [14]:
# Merge
merged = pd.merge(
    merged,
    option_pivot,
    on='TradeDate',
    how='left'
)

In [15]:
# Forward fill options metrics for all 5-min bars
option_cols = [c for c in merged.columns if '_CE' in c or '_PE' in c]
merged[option_cols] = merged[option_cols].ffill()

### Compute Option Edge Metrics

In [16]:
# Confirm columns exist
option_cols = [c for c in merged.columns if '_CE' in c or '_PE' in c]
print("Option columns:", option_cols)

Option columns: ['OI_sum_CE', 'OI_sum_PE', 'CHNG_OI_sum_CE', 'CHNG_OI_sum_PE', 'VOLUME_sum_CE', 'VOLUME_sum_PE', 'IV_mean_CE', 'IV_mean_PE', 'LTP_mean_CE', 'LTP_mean_PE']


In [17]:
# Compute option edge metrics
merged['PCR_OI'] = merged['OI_sum_PE'] / merged['OI_sum_CE']
merged['PCR_VOLUME'] = merged['VOLUME_sum_PE'] / merged['VOLUME_sum_CE']
merged['IV_SKEW'] = merged['IV_mean_PE'] - merged['IV_mean_CE']
merged['NET_OI_DIFF'] = merged['CHNG_OI_sum_PE'] - merged['CHNG_OI_sum_CE']

In [18]:
# Optional: forward-fill metrics so 5-min bars all have values
merged[['PCR_OI','PCR_VOLUME','IV_SKEW','NET_OI_DIFF']] = merged[['PCR_OI','PCR_VOLUME','IV_SKEW','NET_OI_DIFF']].ffill()

In [19]:
print(merged[['TradeDate','PCR_OI','PCR_VOLUME','IV_SKEW','NET_OI_DIFF']].head(10))
print("Final shape:", merged.shape)

   TradeDate    PCR_OI  PCR_VOLUME   IV_SKEW  NET_OI_DIFF
0 2025-01-14  0.192743    0.217063  2.126667    -273156.0
1 2025-01-15  0.192743    0.217063  2.126667    -273156.0
2 2025-01-16  0.192743    0.217063  2.126667    -273156.0
3 2025-01-17  0.192743    0.217063  2.126667    -273156.0
4 2025-01-20  0.192743    0.217063  2.126667    -273156.0
5 2025-01-21  0.192743    0.217063  2.126667    -273156.0
6 2025-01-22  0.192743    0.217063  2.126667    -273156.0
7 2025-01-23  0.192743    0.217063  2.126667    -273156.0
8 2025-01-24  0.192743    0.217063  2.126667    -273156.0
9 2025-01-27  0.192743    0.217063  2.126667    -273156.0
Final shape: (250, 51)


In [20]:
# Ensure Date and Expiry columns are datetime
merged['Date'] = pd.to_datetime(merged['Date'])
merged['Expiry'] = pd.to_datetime(merged['Expiry'])


In [21]:
# Spot close
merged['Close_spot'] = merged['Close_spot'].astype(float)

### Compute Days to Expiry

In [22]:
# Days to expiry
merged['DaysToExpiry'] = (merged['Expiry'] - merged['Date']).dt.days
# Convert to fraction of year
merged['T'] = merged['DaysToExpiry'] / 365

### Identify ATM Options

In [23]:
# Separate CE and PE options from your pivoted data
# Assuming you have OI_sum_CE, OI_sum_PE, etc.
# We'll take the ATM strike closest to spot
merged['ATM_strike_CE'] = merged['Close_spot']  # simplification: use spot as proxy
merged['ATM_strike_PE'] = merged['Close_spot']  # simplification: use spot as proxy

In [24]:
# Prices of ATM options (from pivoted columns)
merged['ATM_call_price'] = merged['LTP_mean_CE']
merged['ATM_put_price'] = merged['LTP_mean_PE']

In [25]:
# Implied volatility of ATM options
merged['ATM_IV_CE'] = merged['IV_mean_CE']
merged['ATM_IV_PE'] = merged['IV_mean_PE']

### Calculate Greeks Using mibian

In [26]:
pip install mibian

Note: you may need to restart the kernel to use updated packages.


In [27]:
import mibian

In [28]:
# Risk-free rate in %
rf = 6.5

In [29]:
# Define functions to safely calculate Greeks
def calc_greeks(row):
    if pd.isna(row['ATM_call_price']) or pd.isna(row['ATM_put_price']):
        return pd.Series([np.nan]*6, 
                         index=['delta_CE','delta_PE','gamma','vega','theta','rho'])
    try:
        bs = mibian.BS([row['Close_spot'], row['ATM_strike_CE'], rf, row['T']*365], 
                       callPrice=row['ATM_call_price'], 
                       putPrice=row['ATM_put_price'], 
                       volatility=(row['ATM_IV_CE']*100))  # mibian expects % IV
        return pd.Series([
            bs.callDelta, bs.putDelta, bs.gamma, bs.vega, bs.callTheta, bs.callRho
        ], index=['delta_CE','delta_PE','gamma','vega','theta','rho'])
    except:
        return pd.Series([np.nan]*6, index=['delta_CE','delta_PE','gamma','vega','theta','rho'])

In [None]:
# Apply to merged dataframe
merged[['delta_CE','delta_PE','gamma','vega','theta','rho']] = merged.apply(calc_greeks, axis=1)

### Derived Features

#### Average IV

In [None]:
merged['IV_avg'] = (merged['IV_mean_CE'] + merged['IV_mean_PE']) / 2

#### IV Spread

In [None]:
merged['IV_spread'] = merged['IV_mean_CE'] - merged['IV_mean_PE']

#### PCR (OI-based)

In [None]:
merged['PCR_OI'] = merged['OI_sum_PE'] / merged['OI_sum_CE']

#### PCR (Volume-based)

In [None]:
merged['PCR_VOLUME'] = merged['VOLUME_sum_PE'] / merged['VOLUME_sum_CE']

#### Futures Basis

In [None]:
merged['fut_basis_pct'] = (merged['Close_fut'] - merged['Close_spot']) / merged['Close_spot']