# Quantitative Trading System - Exploratory Analysis

This notebook provides exploratory data analysis and visualization for the quantitative trading system.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys

sys.path.append('..')

from src.config import *
from src.utils import load_dataframe

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (15, 8)

%matplotlib inline

## 1. Load Data

In [None]:
# Load processed data
features_df = load_dataframe(FEATURES_FILE)
regime_df = load_dataframe(REGIME_FILE)

print(f"Features shape: {features_df.shape}")
print(f"Regime data shape: {regime_df.shape}")

# Display first few rows
regime_df.head()

## 2. Data Overview

In [None]:
# Basic statistics
regime_df.describe()

## 3. Price Analysis

In [None]:
# Plot price over time
fig, axes = plt.subplots(2, 1, figsize=(15, 10))

# Price
axes[0].plot(regime_df['timestamp'], regime_df['close'])
axes[0].set_title('NIFTY 50 Price')
axes[0].set_ylabel('Price')
axes[0].grid(True, alpha=0.3)

# Volume
axes[1].bar(regime_df['timestamp'], regime_df['volume'], width=0.001)
axes[1].set_title('Trading Volume')
axes[1].set_ylabel('Volume')
axes[1].set_xlabel('Date')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Regime Analysis

In [None]:
# Regime distribution
regime_counts = regime_df['regime'].value_counts().sort_index()

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Bar chart
axes[0].bar([REGIME_LABELS[i] for i in regime_counts.index], regime_counts.values)
axes[0].set_title('Regime Distribution')
axes[0].set_ylabel('Count')
axes[0].grid(True, alpha=0.3)

# Pie chart
axes[1].pie(regime_counts.values, labels=[REGIME_LABELS[i] for i in regime_counts.index],
           autopct='%1.1f%%', startangle=90)
axes[1].set_title('Regime Distribution (%)')

plt.tight_layout()
plt.show()

## 5. Options Analysis

In [None]:
# IV analysis
if 'iv_atm_call' in regime_df.columns and 'iv_atm_put' in regime_df.columns:
    fig, axes = plt.subplots(2, 1, figsize=(15, 10))
    
    # IV over time
    axes[0].plot(regime_df['timestamp'], regime_df['iv_atm_call'], label='Call IV', alpha=0.7)
    axes[0].plot(regime_df['timestamp'], regime_df['iv_atm_put'], label='Put IV', alpha=0.7)
    axes[0].set_title('Implied Volatility (ATM)')
    axes[0].set_ylabel('IV')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # IV by regime
    regime_df.boxplot(column=['iv_atm_call', 'iv_atm_put'], by='regime', ax=axes[1])
    axes[1].set_title('IV Distribution by Regime')
    axes[1].set_xlabel('Regime')
    axes[1].set_ylabel('IV')
    
    plt.tight_layout()
    plt.show()

## 6. Feature Correlations

In [None]:
# Select key features for correlation
key_features = ['close', 'volume', 'iv_atm_call', 'iv_atm_put', 'pcr_oi', 
                'futures_basis', 'delta_atm_call', 'gamma_atm_call']

available_features = [f for f in key_features if f in regime_df.columns]

if available_features:
    corr_matrix = regime_df[available_features].corr()
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, 
                square=True, linewidths=1)
    plt.title('Feature Correlation Matrix')
    plt.tight_layout()
    plt.show()

## 7. Strategy Results

In [None]:
# Load strategy results
try:
    backtest_df = load_dataframe(f'{RESULTS_DIR}/ema_strategy_backtest.csv')
    trades_df = load_dataframe(f'{RESULTS_DIR}/ema_strategy_trades.csv')
    
    print(f"Total trades: {len(trades_df)}")
    print(f"Winning trades: {(trades_df['return'] > 0).sum()}")
    print(f"Win rate: {(trades_df['return'] > 0).mean():.2%}")
    print(f"Average return: {trades_df['return'].mean():.4f}")
    
    # Plot equity curve
    plt.figure(figsize=(15, 6))
    plt.plot(backtest_df['timestamp'], backtest_df['capital'])
    plt.axhline(y=INITIAL_CAPITAL, color='r', linestyle='--', label='Initial Capital')
    plt.title('Equity Curve')
    plt.xlabel('Date')
    plt.ylabel('Capital (â‚¹)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()
    
except:
    print("Strategy results not available. Run the strategy first.")

## 8. Trade Distribution

In [None]:
try:
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Return distribution
    axes[0].hist(trades_df['return'], bins=50, edgecolor='black')
    axes[0].axvline(x=0, color='r', linestyle='--')
    axes[0].set_title('Trade Return Distribution')
    axes[0].set_xlabel('Return')
    axes[0].set_ylabel('Frequency')
    axes[0].grid(True, alpha=0.3)
    
    # Duration distribution
    if 'duration' in trades_df.columns:
        axes[1].hist(trades_df['duration'], bins=30, edgecolor='black')
        axes[1].set_title('Trade Duration Distribution')
        axes[1].set_xlabel('Duration (hours)')
        axes[1].set_ylabel('Frequency')
        axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
except:
    pass