# Feature Engineering

Transform raw stock prices into risk features for clustering.

In [None]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../src')

from features import (
    calculate_returns,
    calculate_volatility_features,
    calculate_risk_metrics,
    calculate_technical_indicators,
    calculate_liquidity_features,
    calculate_momentum_features,
    calculate_drawdown,
    aggregate_stock_features
)

## Load Data

In [None]:
df = pd.read_csv('../Data/Processed/cleaned_nse.csv')
print(f"Loaded {len(df):,} rows for {df['Stock_code'].nunique()} stocks")
print(f"\nColumns: {list(df.columns)}")
df.head(3)

## Step 1: Returns

In [None]:
print("Step 1: Calculating returns...")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_returns)
print(f"✅ Added: daily_return")
print(f"New columns: {[c for c in df.columns if 'return' in c.lower()]}")

## Step 2: Volatility

In [None]:
print("Step 2: Calculating volatility...")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_volatility_features)
print(f"✅ Added: volatility_7d, volatility_14d, volatility_30d")
print(f"New columns: {[c for c in df.columns if 'volatility' in c.lower()]}")

## Step 3: Risk Metrics

In [None]:
print("Step 3: Calculating risk metrics...")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_risk_metrics)
print(f"✅ Added: downside_deviation_30d, var_95")
print(f"New columns: {[c for c in df.columns if 'downside' in c.lower() or 'var' in c.lower()]}")

## Step 4: Technical Indicators

In [None]:
print("Step 4: Calculating technical indicators...")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_technical_indicators)
print(f"✅ Added: rsi, bb_width, macd")
print(f"New columns: {[c for c in df.columns if any(x in c.lower() for x in ['rsi', 'bb', 'macd'])]}")

## Step 5: Liquidity

In [None]:
print("Step 5: Calculating liquidity features...")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_liquidity_features)
print(f"✅ Added: avg_volume, volume_volatility, amihud_illiquidity")
print(f"New columns: {[c for c in df.columns if 'volume' in c.lower() or 'illiquidity' in c.lower()]}")

## Step 6: Momentum

In [None]:
print("Step 6: Calculating momentum features...")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_momentum_features)
print(f"✅ Added: momentum_7d, momentum_30d, momentum_90d, ma_7, ma_30, ma_50")
print(f"New columns: {[c for c in df.columns if 'momentum' in c.lower() or 'ma_' in c.lower() or 'price_to' in c.lower()]}")

## Step 7: Drawdown

In [None]:
print("Step 7: Calculating drawdown...")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_drawdown)
print(f"✅ Added: current_drawdown, max_drawdown")
print(f"New columns: {[c for c in df.columns if 'drawdown' in c.lower()]}")

## Step 8: Aggregate to Stock Level

In [None]:
print("\nAggregating to stock level...")
print(f"Before: {len(df):,} rows")

features_list = []
for stock_code, group in df.groupby('Stock_code'):
    stock_features = aggregate_stock_features(group)
    if stock_features is not None:
        features_list.append(stock_features)

df_features = pd.DataFrame(features_list)
print(f"After: {len(df_features)} stocks with {len(df_features.columns)} features")
df_features.head()

## Inspect Features

In [None]:
print("\nFeature Summary:")
df_features.info()

In [None]:
print("\nKey Stats:")
df_features[['volatility_mean', 'sharpe_ratio', 'max_drawdown', 'rsi_mean']].describe().round(4)

## Save

In [None]:
output_path = '../Data/Processed/nse_features.csv'
df_features.to_csv(output_path, index=False)
print(f"✅ Saved to {output_path}")
print(f"   {len(df_features)} stocks × {len(df_features.columns)} features")