# P&L Curves Comparison

This notebook visualizes and compares cumulative P&L curves across multiple backtest runs.

## Features

- Overlay multiple P&L curves on a single plot for easy comparison
- Interactive plotly charts with hover tooltips and legend controls
- Fast metadata-based filtering before loading full data
- Color-coded traces for distinguishing different runs

## Usage

1. Configure stock symbol and paths
2. Scan available runs to see metadata
3. Filter runs using the summary DataFrame
4. Load selected runs
5. Visualize P&L curves

Use this notebook to quickly identify which parameter combinations lead to better performance profiles.

In [None]:
# Import required libraries
import sys
from pathlib import Path

# Add analysis folder to path for imports
project_root = Path('/home/pjpr/projects/wee_hedgy_thing/quiescence')
analysis_path = project_root / 'analysis'

if str(analysis_path) not in sys.path:
    sys.path.insert(0, str(analysis_path))

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Import custom utility functions from analysis folder
from utilities import (
    scan_backtest_runs, 
    load_run_data, 
    create_runs_summary_dataframe, 
    convert_utc_to_ny,
    build_cumulative_pnl_from_positions
)


## 1. Configuration

Set the stock symbol and paths for analysis.

In [None]:
# === CONFIGURATION ===
stock_symbol = "LLY"

# Project paths
STORAGE_ROOT = Path("/data/quiescence/")
BACKTEST_ROOT = STORAGE_ROOT / "backtest"

print(f"Analyzing stock: {stock_symbol}")
print(f"Backtest runs root: {BACKTEST_ROOT}")

## 2. Scan and Filter Runs

First, scan all available runs and create a summary DataFrame for filtering.

In [None]:
# Scan all runs for the configured stock (fast - metadata only)
runs_metadata = scan_backtest_runs(BACKTEST_ROOT, stock_symbol)

# Create summary DataFrame for easy filtering and comparison
df_summary = create_runs_summary_dataframe(runs_metadata)

print(f"\nFound {len(df_summary)} runs for {stock_symbol}\n")
print(df_summary.to_string(index=False))

## 3. Select Runs to Load

Choose which runs to analyze. You can load all, specific runs by number, or filter by parameters.

In [None]:
# Option 1: Load ALL runs
runs_data = [load_run_data(run) for run in runs_metadata]

# Option 2: Load specific runs by Run number
#selected_run_numbers = [11, 19, 23, 34, 42, 43, 47]
#runs_data = [load_run_data(runs_metadata[run_num - 1]) for run_num in selected_run_numbers if 0 < run_num <= len(runs_metadata)]

# Option 3: Filter using DataFrame conditions
# Example: Load runs with specific parameter values
#filtered_df = df_summary[
#    (df_summary['Entry P Top'].astype(float) >= 0.95) & 
#    (df_summary['Frequency'] == '1-MINUTE')
#]

#filtered_df = df_summary[(df_summary['Max Pos Bars'] == 9999)]

#selected_indices = filtered_df['Run'].values - 1
#runs_data = [load_run_data(runs_metadata[i]) for i in selected_indices]

print(f"\nLoaded {len(runs_data)} runs:\n")
#print(filtered_df.to_string(index=False))

## 5. Visualize P&L Curves

Create an interactive plot overlaying all selected runs' P&L curves.

In [None]:
# Create figure with matplotlib
fig, ax = plt.subplots(figsize=(14, 7))

# Add each run's P&L curve to the same plot
for i, run in enumerate(runs_data):
    # Extract frequency from bar_type
    bar_type = run.get('bar_type', '')
    if bar_type:
        parts = bar_type.split('-')
        frequency = f"{parts[1]}{parts[2].lower()[0:3]}" if len(parts) >= 3 else 'Unknown'
    else:
        frequency = 'Unknown'
    
    positions = run["positions_report"]
    
    # Build cumulative P&L from positions
    cumulative_pnl = build_cumulative_pnl_from_positions(positions)
    
    if len(cumulative_pnl) == 0:
        print(f"Warning: Run {i+1} has no positions")
        continue
    
    # Create a descriptive label for this run
    max_bar_pos = run.get('max_position_bars', 'N/A')
    label = f"{frequency} ({max_bar_pos})"
    
    # Plot the P&L curve
    ax.plot(cumulative_pnl.index, cumulative_pnl.values, 
            label=label, linewidth=1.5, alpha=0.8)

# Format the plot
ax.set_title(f'P&L Curves Comparison for {stock_symbol}', fontsize=14, fontweight='bold')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Realized P&L ($)', fontsize=12)
ax.legend(loc='upper left', framealpha=0.9)
ax.grid(True, alpha=0.3)

# Format x-axis dates
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
fig.autofmt_xdate()

# Add horizontal line at y=0
ax.axhline(y=0, color='black', linestyle='--', linewidth=0.5, alpha=0.5)

plt.tight_layout()
plt.show()


## 6. Summary Statistics

Display final P&L values for each run to quickly identify top performers.

In [None]:
# Calculate and display final P&L for each run
summary_stats = []

for i, run in enumerate(runs_data):
    positions = run['positions_report']
    
    # Extract frequency
    bar_type = run.get('bar_type', '')
    if bar_type:
        parts = bar_type.split('-')
        frequency = f"{parts[1]}-{parts[2]}" if len(parts) >= 3 else 'Unknown'
    else:
        frequency = 'Unknown'
    
    # Calculate final P&L
    cumulative_pnl = build_cumulative_pnl_from_positions(positions)
    final_pnl = cumulative_pnl.iloc[-1] if len(cumulative_pnl) > 0 else 0
    
    # Calculate max drawdown
    running_max = cumulative_pnl.expanding().max()
    drawdown = cumulative_pnl - running_max
    max_drawdown = drawdown.min()
    
    # Calculate percentage drawdown (max drawdown / peak at that point * 100)
    max_dd_idx = drawdown.idxmin()
    peak_at_max_dd = running_max.loc[max_dd_idx] if max_dd_idx in running_max.index else running_max.max()
    pct_drawdown = (max_drawdown / peak_at_max_dd * 100) if peak_at_max_dd != 0 else 0
    
    summary_stats.append({
        'Run': i + 1,
        'Frequency': frequency,
        'Max Pos Bars': run.get('max_position_bars', 'N/A'),
        'Final P&L ($)': f"{final_pnl:.2f}",
        'Max Drawdown ($)': f"{max_drawdown:.2f}",
        'Max DD (%)': f"{pct_drawdown:.1f}%",
        'Total Trades': len(positions)
    })

df_stats = pd.DataFrame(summary_stats)
print("\n" + "="*80)
print("P&L SUMMARY STATISTICS")
print("="*80 + "\n")
print(df_stats.to_string(index=False))


## 7. P&L Distribution Analysis

Visualize the distribution of individual trade P&L for each run to understand win/loss patterns.

In [None]:
# Configuration: Set the x-axis range in dollars
pnl_xlim_dollars = 50  # Display range: ±$X (e.g., 25, 50, 100, 200)
transaction_cost_dollars = 2  # Transaction cost threshold in dollars
n_bins = 1000

# Calculate grid dimensions
num_runs = len(runs_data)
ncols = 2
nrows = (num_runs + ncols - 1) // ncols  # Ceiling division

# Create subplots
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(14, 4 * nrows))

# Flatten axes array for easier iteration
if nrows == 1:
    axes = axes.reshape(1, -1)
axes_flat = axes.flatten()

# Plot distribution for each run
for i, run in enumerate(runs_data):
    positions = run['positions_report']
    
    # Extract frequency
    bar_type = run.get('bar_type', '')
    if bar_type:
        parts = bar_type.split('-')
        frequency = f"{parts[1]}-{parts[2]}" if len(parts) >= 3 else 'Unknown'
    else:
        frequency = 'Unknown'
    
    # Clean realized P&L
    if positions['realized_pnl'].dtype == 'object':
        pnl_values = positions['realized_pnl'].str.replace(' USD', '').astype(float)
    else:
        pnl_values = positions['realized_pnl']
    
    # Calculate statistics
    win_rate = (pnl_values > 0).sum() / len(pnl_values) * 100
    mean_pnl = pnl_values.mean()
    median_pnl = pnl_values.median()
    std_pnl = pnl_values.std()
    
    # Plot histogram
    ax = axes_flat[i]
    ax.hist(pnl_values, bins=n_bins, edgecolor='black', alpha=0.7, color='steelblue')
    
    # Set x-axis limits to ±X dollars
    ax.set_xlim(-pnl_xlim_dollars, pnl_xlim_dollars)
    
    # Add vertical line at zero
    ax.axvline(x=0, color='red', linestyle='--', linewidth=1, alpha=0.7)
    
    # Add vertical line at transaction cost threshold
    ax.axvline(x=transaction_cost_dollars, color='darkgreen', linestyle=':', linewidth=1.5, alpha=0.7, label=f'${transaction_cost_dollars} (tx cost)')
    
    # Create title with run info
    max_bar_pos = run.get('max_position_bars', 'N/A')
    title = f"{frequency} ({max_bar_pos})"
    ax.set_title(title, fontsize=11, fontweight='bold')
    
    # Add statistics text box
    stats_text = f'Win Rate: {win_rate:.1f}%\nMean: ${mean_pnl:.2f}\nMedian: ${median_pnl:.2f}'
    ax.text(0.98, 0.97, stats_text, transform=ax.transAxes,
            verticalalignment='top', horizontalalignment='right',
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8),
            fontsize=9)
    
    ax.set_xlabel('Trade P&L ($)', fontsize=10)
    ax.set_ylabel('Frequency', fontsize=10)
    ax.grid(True, alpha=0.3)

# Hide unused subplots
for i in range(num_runs, len(axes_flat)):
    axes_flat[i].set_visible(False)

plt.tight_layout()
plt.show()