# RL Agent Performance Evaluation (Enhanced)

## 1. Configuration and Data Loading

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# --- Configuration ---
BASELINE_TRADES_PATH = 'data/trades_master.csv'
PPO_TRADES_PATH = 'data/ppo_trades.csv' # Replace with actual path
SAC_TRADES_PATH = 'data/sac_trades.csv' # Replace with actual path
TQC_TRADES_PATH = 'data/tqc_trades.csv' # Replace with actual path
CAPITAL = 100000

# --- Load Data ---
try:
    baseline_trades = pd.read_csv(BASELINE_TRADES_PATH)
    baseline_trades['exit_time'] = pd.to_datetime(baseline_trades['exit_time'])
except FileNotFoundError:
    baseline_trades = pd.DataFrame()

try:
    ppo_trades = pd.read_csv(PPO_TRADES_PATH)
    ppo_trades['exit_time'] = pd.to_datetime(ppo_trades['exit_time'])
except FileNotFoundError:
    ppo_trades = pd.DataFrame()

try:
    sac_trades = pd.read_csv(SAC_TRADES_PATH)
    sac_trades['exit_time'] = pd.to_datetime(sac_trades['exit_time'])
except FileNotFoundError:
    sac_trades = pd.DataFrame()

try:
    tqc_trades = pd.read_csv(TQC_TRADES_PATH)
    tqc_trades['exit_time'] = pd.to_datetime(tqc_trades['exit_time'])
except FileNotFoundError:
    tqc_trades = pd.DataFrame()

datasets = {
    'Baseline': baseline_trades,
    'PPO': ppo_trades,
    'SAC': sac_trades,
    'TQC': tqc_trades
}

print('Data loaded.')

## 2. Comprehensive Performance Metrics

In [None]:
def calculate_all_metrics(trades, capital=100000):
    if trades.empty:
        return {}

    returns = trades['pnl'] / capital
    
    # PnL and Win Rate
    total_pnl = trades['pnl'].sum()
    win_rate = (trades['pnl'] > 0).mean()
    
    # R-Multiple and Expectancy
    avg_r = trades['r_multiple'].mean()
    expectancy = (trades[trades['pnl'] > 0]['pnl'].mean() * win_rate) + (trades[trades['pnl'] < 0]['pnl'].mean() * (1 - win_rate))

    # Risk-Adjusted Returns\n    daily_returns = trades.set_index('exit_time')['pnl'].resample('D').sum() / capital
    if daily_returns.std() > 0:\n        sharpe = (daily_returns.mean() / daily_returns.std()) * np.sqrt(252)\n    else:\n        sharpe = 0.0\n    
    negative_returns = daily_returns[daily_returns < 0]\n    if negative_returns.std() > 0:\n        sortino = (daily_returns.mean() / negative_returns.std()) * np.sqrt(252)\n    else:\n        sortino = 0.0\n    
    cumulative_returns = (1 + daily_returns).cumprod()\n    max_drawdown = (cumulative_returns.cummax() - cumulative_returns).max()\n    
    annual_return = (cumulative_returns.iloc[-1]) ** (252 / len(cumulative_returns)) - 1\n    if max_drawdown > 0:\n        calmar = annual_return / max_drawdown\n    else:\n        calmar = 0.0\n
    # Trade Duration
    trades['duration'] = (trades['exit_time'] - pd.to_datetime(trades['entry_time'])).dt.total_seconds() / 3600 # in hours
    avg_duration = trades['duration'].mean()

    # Tail Metrics
    tail_gain = returns.quantile(0.95)
    tail_loss = returns.quantile(0.05)
    
    # W/L Concentration
    win_concentration = trades[trades['pnl'] > 0]['pnl'].nlargest(5).sum() / total_pnl if total_pnl > 0 else 0
    loss_concentration = trades[trades['pnl'] < 0]['pnl'].nsmallest(5).sum() / trades[trades['pnl'] < 0]['pnl'].sum() if trades[trades['pnl'] < 0]['pnl'].sum() < 0 else 0

    return {
        'Total PnL': f'${total_pnl:,.2f}',
        'Win Rate': f'{win_rate:.2%}',
        'Sharpe Ratio': f'{sharpe:.2f}',
        'Sortino Ratio': f'{sortino:.2f}',
        'Max Drawdown': f'{max_drawdown:.2%}',
        'Calmar Ratio': f'{calmar:.2f}',
        'Avg R-Multiple': f'{avg_r:.2f}',
        'Expectancy': f'${expectancy:.2f}',
        'Avg Duration (h)': f'{avg_duration:.2f}',
        '95% Tail Gain': f'{tail_gain:.2%}',
        '5% Tail Loss': f'{tail_loss:.2%}',
        'Win Concentration (Top 5)': f'{win_concentration:.2%}',
        'Loss Concentration (Top 5)': f'{loss_concentration:.2%}'
    }

all_metrics = {name: calculate_all_metrics(df, capital=CAPITAL) for name, df in datasets.items() if not df.empty}
metrics_df = pd.DataFrame(all_metrics).T
print('--- Performance Metrics ---')
display(metrics_df)

## 3. Visualizations

In [None]:
def plot_equity_curves(datasets, capital=100000):
    plt.figure(figsize=(14, 7))
    for name, trades in datasets.items():
        if not trades.empty:
            equity_curve = (trades['pnl'].cumsum() + capital)
            plt.plot(equity_curve, label=name)
    plt.title('Equity Curves')
    plt.xlabel('Trade Number')
    plt.ylabel('Equity')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_equity_curves(datasets, capital=CAPITAL)

In [None]:
def plot_r_multiple_distributions(datasets):
    fig, axes = plt.subplots(len(datasets), 1, figsize=(10, 5 * len(datasets)), sharex=True)
    for i, (name, trades) in enumerate(datasets.items()):
        if not trades.empty:
            ax = axes[i]
            sns.histplot(trades['r_multiple'], bins=100, kde=True, ax=ax)
            ax.set_title(f'{name} R-Multiple Distribution')
            ax.axvline(0, color='k', linestyle='--')
            ax.set_xlabel('R-Multiple')
    plt.tight_layout()
    plt.show()

plot_r_multiple_distributions(datasets)

In [None]:
def plot_performance_by_regime(datasets):
    fig, axes = plt.subplots(len(datasets), 1, figsize=(10, 5 * len(datasets)))
    for i, (name, trades) in enumerate(datasets.items()):
        if not trades.empty and 'regime' in trades.columns:
            ax = axes[i]
            regime_pnl = trades.groupby('regime')['pnl'].sum()
            regime_pnl.plot(kind='bar', ax=ax)
            ax.set_title(f'{name} PnL by Regime')
            ax.set_ylabel('Total PnL')
    plt.tight_layout()
    plt.show()

plot_performance_by_regime(datasets)

## 4. Asymmetric Payoff Analysis

In [None]:
def analyze_asymmetric_profile(trades, name):
    if trades.empty:
        return
    
    wins = trades[trades['pnl'] > 0]
    losses = trades[trades['pnl'] < 0]
    
    small_loss_threshold = losses['pnl'].quantile(0.8) # 80% of losses are smaller than this
    large_win_threshold = wins['pnl'].quantile(0.8) # 20% of wins are larger than this

    small_loss_outcomes = (losses['pnl'] >= small_loss_threshold).mean()
    large_win_outcomes = (wins['pnl'] >= large_win_threshold).mean()

    print(f'--- {name} Asymmetric Profile ---')
    print(f'Target: >=80% small-loss outcomes, <=20% large-win outcomes')
    print(f'Small Loss Outcomes (Bottom 80% of losses): {small_loss_outcomes:.2%}')
    print(f'Large Win Outcomes (Top 20% of wins): {large_win_outcomes:.2%}')

for name, df in datasets.items():
    analyze_asymmetric_profile(df, name)

## 5. Explainable AI (XAI) Analysis

In [None]:
import shap
from sb3_contrib import TQC

# Load a trained TQC agent
try:
    tqc_model = TQC.load(TQC_TRADES_PATH.replace('.csv', '.zip'))
except FileNotFoundError:
    tqc_model = None

if tqc_model:
    # Create a SHAP explainer
    def predict_fn(x):
        return tqc_model.predict(x, deterministic=True)[0]
    
    explainer = shap.KernelExplainer(predict_fn, np.zeros((1, tqc_model.observation_space.shape[0])))

    # Select a few data points to explain
    sample_indices = [100, 200, 300] # Replace with interesting indices
    sample_data = np.array([tqc_trades.iloc[i].values for i in sample_indices])

    # Generate SHAP values
    shap_values = explainer.shap_values(sample_data)

    # Visualize the explanations
    shap.summary_plot(shap_values, sample_data, feature_names=tqc_trades.columns)
