# 3. Feature Engineering
## Create Technical Indicators and Derived Features

This notebook creates:
- EMA indicators (5 and 15 period)
- Options Greeks (Delta, Gamma, Theta, Vega, Rho)
- Derived features (IV metrics, PCR, Futures Basis, etc.)

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from data_utils import load_data
from features import EMAIndicators, DerivedFeatures, TimeBasedFeatures, LagFeatures, create_feature_set
from greeks import GreeksCalculator
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

In [None]:
# Load merged data
df = load_data('../data/nifty_merged_5min.csv')
print(f"Data shape: {df.shape}")
df.head()

## 3.1 Add EMA Indicators

In [None]:
# Calculate EMAs
df = EMAIndicators.add_ema_indicators(df, fast_period=5, slow_period=15)

print("EMA indicators added:")
print(df[['close_spot', 'ema_5', 'ema_15', 'ema_diff', 'ema_signal']].head(20))

In [None]:
# Visualize EMAs
plt.figure(figsize=(15, 6))
plt.plot(df['timestamp'][:500], df['close_spot'][:500], label='Close', alpha=0.7)
plt.plot(df['timestamp'][:500], df['ema_5'][:500], label='EMA 5', linewidth=2)
plt.plot(df['timestamp'][:500], df['ema_15'][:500], label='EMA 15', linewidth=2)
plt.title('EMA Indicators')
plt.xlabel('Timestamp')
plt.ylabel('Price')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('../plots/ema_indicators.png', dpi=300, bbox_inches='tight')
plt.show()

## 3.2 Calculate Greeks

In [None]:
# Initialize Greeks calculator
greeks_calc = GreeksCalculator(risk_free_rate=0.065)

# Calculate Greeks for ATM Call
if 'call_iv' in df.columns:
    df = greeks_calc.add_greeks_to_dataframe(
        df,
        spot_col='close_spot',
        strike_col='atm_strike',
        iv_col='call_iv',
        option_type='call'
    )
    print("Call Greeks calculated")

# Calculate Greeks for ATM Put
if 'put_iv' in df.columns:
    df = greeks_calc.add_greeks_to_dataframe(
        df,
        spot_col='close_spot',
        strike_col='atm_strike',
        iv_col='put_iv',
        option_type='put'
    )
    print("Put Greeks calculated")

print("\nGreeks columns:")
greek_cols = [col for col in df.columns if any(g in col for g in ['delta', 'gamma', 'theta', 'vega', 'rho'])]
print(greek_cols)

## 3.3 Add Derived Features

In [None]:
# Add all derived features
df = DerivedFeatures.add_all_derived_features(df)

print("Derived features added:")
derived_cols = ['avg_iv', 'iv_spread', 'pcr_oi', 'pcr_volume', 'futures_basis', 
                'spot_returns', 'delta_neutral_ratio', 'gamma_exposure']
for col in derived_cols:
    if col in df.columns:
        print(f"  ✓ {col}")

## 3.4 Add Time-Based Features

In [None]:
# Add time features
df = TimeBasedFeatures.add_time_features(df, timestamp_col='timestamp')

print("Time-based features added:")
time_cols = ['hour', 'minute', 'day_of_week', 'is_opening_hour', 'is_closing_hour']
print(df[time_cols].head())

## 3.5 Add Lag Features

In [None]:
# Add lag features for key columns
key_columns = ['close_spot', 'volume_spot', 'avg_iv', 'pcr_oi']
key_columns = [col for col in key_columns if col in df.columns]

df = LagFeatures.add_lag_features(df, columns=key_columns, lags=[1, 2, 3])

print(f"\nLag features added for {len(key_columns)} columns")

## 3.6 Feature Statistics

In [None]:
# Display feature statistics
print("Feature Statistics:")
print(df.describe())

In [None]:
# Correlation heatmap
feature_cols = ['ema_5', 'ema_15', 'avg_iv', 'iv_spread', 'pcr_oi', 'futures_basis', 'spot_returns']
feature_cols = [col for col in feature_cols if col in df.columns]

plt.figure(figsize=(12, 10))
correlation_matrix = df[feature_cols].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, fmt='.2f')
plt.title('Feature Correlation Heatmap')
plt.tight_layout()
plt.savefig('../plots/feature_correlation.png', dpi=300, bbox_inches='tight')
plt.show()

## 3.7 Save Feature Set

In [None]:
# Save complete feature set
df.to_csv('../data/nifty_features_5min.csv', index=False)
print(f"\nFeature set saved: {df.shape[0]} rows, {df.shape[1]} columns")
print("File: data/nifty_features_5min.csv")

## Summary

In [None]:
print("=" * 80)
print("FEATURE ENGINEERING SUMMARY")
print("=" * 80)
print(f"\nTotal features: {df.shape[1]}")
print(f"Total rows: {df.shape[0]}")
print("\nFeature categories:")
print("  ✓ EMA Indicators (5, 15 period)")
print("  ✓ Options Greeks (Delta, Gamma, Theta, Vega, Rho)")
print("  ✓ Derived Features (IV, PCR, Basis, Returns)")
print("  ✓ Time-based Features")
print("  ✓ Lag Features")
print("\nNext Step: Proceed to 04_regime_detection.ipynb")