# Phase 1: Build PnL Time Series per Cluster

This notebook contains the analysis for the Risk & Portfolio Interaction study.

Phase 1: Build PnL Time Series per Cluster
==========================================
Goal: Turn trades into time series for portfolio-level risk analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

print("=" * 80)
print("PHASE 1: BUILD PnL TIME SERIES PER CLUSTER")
print("=" * 80)

# Load the main trades data
trades_df = pd.read_csv('/mnt/user-data/uploads/trades_with_clusters.csv')

# Convert datetime columns
trades_df['entry_time'] = pd.to_datetime(trades_df['entry_time'])
trades_df['exit_time'] = pd.to_datetime(trades_df['exit_time'])

print(f"\nðŸ“Š Dataset Overview:")
print(f"   Total trades: {len(trades_df)}")
print(f"   Date range: {trades_df['entry_time'].min().date()} to {trades_df['exit_time'].max().date()}")
print(f"   Clusters: {sorted(trades_df['cluster'].unique())}")
print(f"   Total profit: ${trades_df['profit'].sum():.2f}")

# Cluster distribution
print(f"\nðŸ“ˆ Trades per Cluster:")
for cluster in sorted(trades_df['cluster'].unique()):
    cluster_trades = trades_df[trades_df['cluster'] == cluster]
    print(f"   Cluster {int(cluster)}: {len(cluster_trades)} trades, Total PnL: ${cluster_trades['profit'].sum():.2f}")

# Step 1: Create date column from exit_time
trades_df['date'] = trades_df['exit_time'].dt.floor('D')

# Step 2: Group by date and cluster, sum profit
daily_pnl = trades_df.groupby(['date', 'cluster'])['profit'].sum().reset_index()

print(f"\nðŸ“… Daily PnL Summary:")
print(f"   Trading days: {daily_pnl['date'].nunique()}")
print(f"   Daily entries: {len(daily_pnl)}")

# Step 3: Pivot to get one column per cluster
daily_pnl_pivot = daily_pnl.pivot(index='date', columns='cluster', values='profit')
daily_pnl_pivot.columns = [f'cluster_{int(c)}_pnl' for c in daily_pnl_pivot.columns]
daily_pnl_pivot = daily_pnl_pivot.fillna(0)  # Days without trades = 0 PnL

print(f"\nðŸ“‹ Daily PnL Matrix Shape: {daily_pnl_pivot.shape}")
print(f"   Columns: {list(daily_pnl_pivot.columns)}")

# Create total portfolio PnL
daily_pnl_pivot['total_pnl'] = daily_pnl_pivot.sum(axis=1)

# Cumulative PnL
cumulative_pnl = daily_pnl_pivot.cumsum()

print("\n" + "=" * 80)
print("DAILY PnL STATISTICS")
print("=" * 80)
print(daily_pnl_pivot.describe().round(2))

# Save to CSV for next phases
daily_pnl_pivot.to_csv('/home/claude/risk_analysis/daily_pnl_per_cluster.csv')
print(f"\nâœ… Daily PnL data saved to: daily_pnl_per_cluster.csv")

In [None]:
# VISUALIZATIONS

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Plot 1: Cumulative PnL by Cluster
ax1 = axes[0, 0]
for col in [c for c in cumulative_pnl.columns if c != 'total_pnl']:
    ax1.plot(cumulative_pnl.index, cumulative_pnl[col], label=col.replace('_pnl', ''), linewidth=2)
ax1.set_title('Cumulative PnL by Cluster', fontsize=14, fontweight='bold')
ax1.set_xlabel('Date')
ax1.set_ylabel('Cumulative PnL ($)')
ax1.legend(loc='upper left')
ax1.axhline(y=0, color='black', linestyle='--', alpha=0.3)
ax1.grid(True, alpha=0.3)

# Plot 2: Total Portfolio Cumulative PnL
ax2 = axes[0, 1]
ax2.fill_between(cumulative_pnl.index, 0, cumulative_pnl['total_pnl'], 
                  where=cumulative_pnl['total_pnl']>=0, color='green', alpha=0.3)
ax2.fill_between(cumulative_pnl.index, 0, cumulative_pnl['total_pnl'], 
                  where=cumulative_pnl['total_pnl']<0, color='red', alpha=0.3)
ax2.plot(cumulative_pnl.index, cumulative_pnl['total_pnl'], color='blue', linewidth=2)
ax2.set_title('Total Portfolio Cumulative PnL', fontsize=14, fontweight='bold')
ax2.set_xlabel('Date')
ax2.set_ylabel('Cumulative PnL ($)')
ax2.axhline(y=0, color='black', linestyle='--', alpha=0.3)
ax2.grid(True, alpha=0.3)

# Plot 3: Daily PnL Distribution by Cluster
ax3 = axes[1, 0]
cluster_cols = [c for c in daily_pnl_pivot.columns if c != 'total_pnl']
daily_pnl_melted = daily_pnl_pivot[cluster_cols].melt(var_name='Cluster', value_name='Daily PnL')
sns.boxplot(data=daily_pnl_melted, x='Cluster', y='Daily PnL', ax=ax3)
ax3.set_title('Daily PnL Distribution by Cluster', fontsize=14, fontweight='bold')
ax3.set_xticklabels([c.replace('_pnl', '') for c in cluster_cols], rotation=45)
ax3.axhline(y=0, color='red', linestyle='--', alpha=0.5)
ax3.grid(True, alpha=0.3)

# Plot 4: Stacked Area - Contribution to Portfolio PnL
ax4 = axes[1, 1]
positive_pnl = daily_pnl_pivot[cluster_cols].clip(lower=0)
negative_pnl = daily_pnl_pivot[cluster_cols].clip(upper=0)
ax4.stackplot(daily_pnl_pivot.index, 
              [positive_pnl[c] for c in cluster_cols],
              labels=[c.replace('_pnl', '') for c in cluster_cols], alpha=0.7)
ax4.set_title('Daily Positive PnL Contribution by Cluster', fontsize=14, fontweight='bold')
ax4.set_xlabel('Date')
ax4.set_ylabel('Daily PnL ($)')
ax4.legend(loc='upper left')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('/home/claude/risk_analysis/phase1_pnl_timeseries.png', dpi=150, bbox_inches='tight')
plt.close()
print(f"âœ… Phase 1 visualization saved to: phase1_pnl_timeseries.png")

In [None]:
# SUMMARY FOR REPORT

In [None]:
print("\n" + "=" * 80)
print("PHASE 1 SUMMARY")
print("=" * 80)
summary = {
    'Total Trades': len(trades_df),
    'Date Range': f"{trades_df['entry_time'].min().date()} to {trades_df['exit_time'].max().date()}",
    'Trading Days': daily_pnl_pivot.shape[0],
    'Total Portfolio PnL': f"${trades_df['profit'].sum():.2f}"
}
for k, v in summary.items():
    print(f"   {k}: {v}")

# Per cluster stats
print("\n   PnL per Cluster:")
for cluster in sorted(trades_df['cluster'].unique()):
    cluster_data = trades_df[trades_df['cluster'] == cluster]
    print(f"      Cluster {int(cluster)}: {len(cluster_data)} trades, ${cluster_data['profit'].sum():.2f}")

print("\nâœ… PHASE 1 COMPLETED")