# Phase 4: Trade Overlap & Concentration Analysis

This notebook contains the analysis for the Risk & Portfolio Interaction study.

Phase 4: Analyze Trade Overlap & Concentration
==============================================
Goal: Understand exposure stacking as a risk source

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

print("=" * 80)
print("PHASE 4: TRADE OVERLAP & CONCENTRATION ANALYSIS")
print("=" * 80)

# Load trades data
trades_df = pd.read_csv('trades_with_clusters.csv')
trades_df['entry_time'] = pd.to_datetime(trades_df['entry_time'])
trades_df['exit_time'] = pd.to_datetime(trades_df['exit_time'])

print(f"\n Loaded {len(trades_df)} trades")

PHASE 4: TRADE OVERLAP & CONCENTRATION ANALYSIS

 Loaded 430 trades


In [None]:
# TRADE OVERLAP ANALYSIS

In [11]:
print("\n" + "=" * 80)
print("TRADE OVERLAP ANALYSIS")
print("=" * 80)

# Create a time grid (hourly intervals)
min_time = trades_df['entry_time'].min()
max_time = trades_df['exit_time'].max()

# Create hourly time points
time_points = pd.date_range(start=min_time, end=max_time, freq='H')
print(f"\nAnalyzing {len(time_points)} hourly time points from {min_time} to {max_time}")

# For each time point, count open trades
def count_open_trades(time_point, trades):
    """Count trades open at a specific time point"""
    open_trades = trades[(trades['entry_time'] <= time_point) & (trades['exit_time'] >= time_point)]
    return open_trades

overlap_data = []
for t in time_points:
    open_trades = count_open_trades(t, trades_df)
    total_open = len(open_trades)
    
    # Count by cluster
    cluster_counts = open_trades['cluster'].value_counts().to_dict()
    
    overlap_data.append({
        'time': t,
        'total_open': total_open,
        'cluster_0_open': cluster_counts.get(0.0, 0),
        'cluster_1_open': cluster_counts.get(1.0, 0),
        'cluster_2_open': cluster_counts.get(2.0, 0),
        'cluster_3_open': cluster_counts.get(3.0, 0),
        'total_exposure_value': open_trades['profit'].abs().sum() if len(open_trades) > 0 else 0
    })

overlap_df = pd.DataFrame(overlap_data)

print(f"\n Overlap Statistics:")
print(f"   Max simultaneous trades: {overlap_df['total_open'].max()}")
print(f"   Average open trades: {overlap_df['total_open'].mean():.2f}")
print(f"   % of time with > 1 trade open: {(overlap_df['total_open'] > 1).mean() * 100:.1f}%")
print(f"   % of time with > 2 trades open: {(overlap_df['total_open'] > 2).mean() * 100:.1f}%")
print(f"   % of time with > 3 trades open: {(overlap_df['total_open'] > 3).mean() * 100:.1f}%")


TRADE OVERLAP ANALYSIS

Analyzing 7630 hourly time points from 2025-01-07 02:00:00 to 2025-11-20 23:59:59

 Overlap Statistics:
   Max simultaneous trades: 4
   Average open trades: 1.79
   % of time with > 1 trade open: 47.3%
   % of time with > 2 trades open: 31.9%
   % of time with > 3 trades open: 18.0%


In [None]:
# CLUSTER CONCENTRATION ANALYSIS

In [12]:
print("\n" + "=" * 80)
print("CLUSTER CONCENTRATION ANALYSIS")
print("=" * 80)

# When multiple trades are open, which clusters dominate?
multi_trade_times = overlap_df[overlap_df['total_open'] > 1]

if len(multi_trade_times) > 0:
    print(f"\n When multiple trades are open ({len(multi_trade_times)} hours):")
    print(f"   Avg Cluster 0 trades: {multi_trade_times['cluster_0_open'].mean():.2f}")
    print(f"   Avg Cluster 1 trades: {multi_trade_times['cluster_1_open'].mean():.2f}")
    print(f"   Avg Cluster 2 trades: {multi_trade_times['cluster_2_open'].mean():.2f}")
    print(f"   Avg Cluster 3 trades: {multi_trade_times['cluster_3_open'].mean():.2f}")


CLUSTER CONCENTRATION ANALYSIS

 When multiple trades are open (3612 hours):
   Avg Cluster 0 trades: 0.71
   Avg Cluster 1 trades: 1.92
   Avg Cluster 2 trades: 0.32
   Avg Cluster 3 trades: 0.10


In [None]:
# TIME-OF-DAY OVERLAP ANALYSIS

In [14]:
print("\n" + "=" * 80)
print("TIME-OF-DAY OVERLAP ANALYSIS")
print("=" * 80)

overlap_df['hour'] = overlap_df['time'].dt.hour
hourly_overlap = overlap_df.groupby('hour')['total_open'].agg(['mean', 'max', 'std'])

print("\n Average Open Trades by Hour (UTC):")
print(hourly_overlap.round(2).to_string())

# Define sessions
def classify_session(hour):
    if 0 <= hour < 8:
        return 'Asian'
    elif 8 <= hour < 12:
        return 'London'
    elif 12 <= hour < 17:
        return 'NY Overlap'
    else:
        return 'Late US'

overlap_df['session'] = overlap_df['hour'].apply(classify_session)
session_overlap = overlap_df.groupby('session')['total_open'].agg(['mean', 'max', 'std'])

print("\n Open Trades by Session:")
print(session_overlap.round(2).to_string())


TIME-OF-DAY OVERLAP ANALYSIS

 Average Open Trades by Hour (UTC):
      mean  max   std
hour                 
0     1.78    4  1.37
1     1.80    4  1.39
2     1.84    4  1.39
3     1.84    4  1.39
4     1.82    4  1.39
5     1.82    4  1.41
6     1.80    4  1.40
7     1.82    4  1.38
8     1.81    4  1.39
9     1.79    4  1.40
10    1.80    4  1.40
11    1.80    4  1.40
12    1.84    4  1.38
13    1.79    4  1.32
14    1.79    4  1.31
15    1.75    4  1.34
16    1.75    4  1.34
17    1.68    4  1.34
18    1.69    4  1.37
19    1.71    4  1.38
20    1.77    4  1.37
21    1.82    4  1.38
22    1.84    4  1.37
23    1.81    4  1.37

 Open Trades by Session:
            mean  max   std
session                    
Asian       1.82    4  1.39
Late US     1.76    4  1.37
London      1.80    4  1.39
NY Overlap  1.78    4  1.34


In [None]:
# HIGH OVERLAP PERIODS ANALYSIS

In [15]:
print("\n" + "=" * 80)
print("HIGH OVERLAP PERIODS (> 3 trades)")
print("=" * 80)

high_overlap = overlap_df[overlap_df['total_open'] > 3]
if len(high_overlap) > 0:
    print(f"\nFound {len(high_overlap)} hours with > 3 simultaneous trades")
    
    # Analyze which clusters contribute most during high overlap
    cluster_contribution = {
        'Cluster 0': high_overlap['cluster_0_open'].sum(),
        'Cluster 1': high_overlap['cluster_1_open'].sum(),
        'Cluster 2': high_overlap['cluster_2_open'].sum(),
        'Cluster 3': high_overlap['cluster_3_open'].sum()
    }
    total = sum(cluster_contribution.values())
    
    print("\n Cluster contribution during high overlap:")
    for cluster, count in cluster_contribution.items():
        pct = (count / total) * 100 if total > 0 else 0
        print(f"   {cluster}: {count} ({pct:.1f}%)")
else:
    print("\nNo periods with > 3 simultaneous trades found")


HIGH OVERLAP PERIODS (> 3 trades)

Found 1370 hours with > 3 simultaneous trades

 Cluster contribution during high overlap:
   Cluster 0: 953 (17.4%)
   Cluster 1: 4045 (73.8%)
   Cluster 2: 384 (7.0%)
   Cluster 3: 98 (1.8%)


In [None]:
# TRADE DURATION ANALYSIS

In [17]:
print("\n" + "=" * 80)
print("TRADE DURATION ANALYSIS")
print("=" * 80)

# Calculate actual duration
trades_df['duration_calc'] = (trades_df['exit_time'] - trades_df['entry_time']).dt.total_seconds() / 3600

print("\n Trade Duration by Cluster (hours):")
duration_stats = trades_df.groupby('cluster')['duration_calc'].agg(['mean', 'median', 'min', 'max', 'std'])
duration_stats.index = [f'Cluster {int(c)}' for c in duration_stats.index]
print(duration_stats.round(2).to_string())

# Long duration trades (potential for more overlap)
long_trades = trades_df[trades_df['duration_calc'] > 48]  # > 48 hours
print(f"\n Long trades (> 48 hours): {len(long_trades)}")
if len(long_trades) > 0:
    print(f"   By cluster: {long_trades['cluster'].value_counts().to_dict()}")


TRADE DURATION ANALYSIS

 Trade Duration by Cluster (hours):
            mean  median  min    max    std
Cluster 0  18.35    9.00  1.0  101.0  22.11
Cluster 1  79.63   77.00  4.0  211.0  54.47
Cluster 2  15.78    4.00  1.0  110.0  23.81
Cluster 3  16.37    9.48  1.0   82.0  20.02

 Long trades (> 48 hours): 95
   By cluster: {1.0: 60, 0.0: 21, 2.0: 8, 3.0: 6}


In [None]:
# REGIME-BASED OVERLAP ANALYSIS

In [18]:
print("\n" + "=" * 80)
print("REGIME-BASED OVERLAP ANALYSIS")
print("=" * 80)

# Get ATR at entry for each time point's open trades
def get_regime_info(time_point, trades):
    open_trades = trades[(trades['entry_time'] <= time_point) & (trades['exit_time'] >= time_point)]
    if len(open_trades) == 0:
        return pd.Series({'avg_atr': np.nan, 'avg_adx': np.nan})
    return pd.Series({
        'avg_atr': open_trades['entry_ATR(14)'].mean(),
        'avg_adx': open_trades['entry_ADX(14)'].mean()
    })

# Sample subset for performance (every 4th hour)
sampled_times = time_points[::4]
regime_data = []
for t in sampled_times:
    open_trades = count_open_trades(t, trades_df)
    if len(open_trades) > 0:
        regime_data.append({
            'time': t,
            'total_open': len(open_trades),
            'avg_atr': open_trades['entry_ATR(14)'].mean(),
            'avg_adx': open_trades['entry_ADX(14)'].mean()
        })

regime_df = pd.DataFrame(regime_data)

if len(regime_df) > 0:
    # Correlate overlap with regime
    median_atr = trades_df['entry_ATR(14)'].median()
    high_vol = regime_df[regime_df['avg_atr'] > median_atr]
    low_vol = regime_df[regime_df['avg_atr'] <= median_atr]
    
    print(f"\n Overlap by Volatility Regime:")
    print(f"   High Vol (ATR > {median_atr:.4f}): Avg {high_vol['total_open'].mean():.2f} open trades")
    print(f"   Low Vol (ATR <= {median_atr:.4f}): Avg {low_vol['total_open'].mean():.2f} open trades")

# Save overlap data
overlap_df.to_csv('trade_overlap.csv', index=False)


REGIME-BASED OVERLAP ANALYSIS

 Overlap by Volatility Regime:
   High Vol (ATR > 3.2868): Avg 1.94 open trades
   Low Vol (ATR <= 3.2868): Avg 2.35 open trades


In [None]:
# VISUALIZATIONS

In [19]:
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Plot 1: Open trades over time
ax1 = axes[0, 0]
ax1.fill_between(overlap_df['time'], 0, overlap_df['total_open'], alpha=0.5, label='Total Open')
ax1.plot(overlap_df['time'], overlap_df['total_open'], color='blue', linewidth=0.5)
ax1.axhline(y=overlap_df['total_open'].mean(), color='red', linestyle='--', 
            label=f'Mean: {overlap_df["total_open"].mean():.1f}')
ax1.set_title('Number of Open Trades Over Time', fontsize=14, fontweight='bold')
ax1.set_xlabel('Date')
ax1.set_ylabel('Open Trades')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Histogram of simultaneous trades
ax2 = axes[0, 1]
counts = overlap_df['total_open'].value_counts().sort_index()
ax2.bar(counts.index, counts.values, color='steelblue', alpha=0.7, edgecolor='black')
ax2.set_title('Distribution of Simultaneous Open Trades', fontsize=14, fontweight='bold')
ax2.set_xlabel('Number of Open Trades')
ax2.set_ylabel('Frequency (hours)')
ax2.grid(True, alpha=0.3)

# Plot 3: Open trades by cluster over time
ax3 = axes[0, 2]
ax3.stackplot(overlap_df['time'], 
              overlap_df['cluster_0_open'], 
              overlap_df['cluster_1_open'],
              overlap_df['cluster_2_open'],
              overlap_df['cluster_3_open'],
              labels=['Cluster 0', 'Cluster 1', 'Cluster 2', 'Cluster 3'],
              alpha=0.7)
ax3.set_title('Open Trades by Cluster Over Time', fontsize=14, fontweight='bold')
ax3.set_xlabel('Date')
ax3.set_ylabel('Open Trades')
ax3.legend(loc='upper right')
ax3.grid(True, alpha=0.3)

# Plot 4: Average overlap by hour
ax4 = axes[1, 0]
ax4.bar(hourly_overlap.index, hourly_overlap['mean'], color='coral', alpha=0.7, edgecolor='black')
ax4.set_title('Average Open Trades by Hour (UTC)', fontsize=14, fontweight='bold')
ax4.set_xlabel('Hour')
ax4.set_ylabel('Avg Open Trades')
ax4.set_xticks(range(0, 24, 2))
# Add session markers
ax4.axvspan(0, 8, alpha=0.1, color='blue', label='Asian')
ax4.axvspan(8, 12, alpha=0.1, color='green', label='London')
ax4.axvspan(12, 17, alpha=0.1, color='orange', label='NY Overlap')
ax4.axvspan(17, 24, alpha=0.1, color='red', label='Late US')
ax4.legend(fontsize=8)
ax4.grid(True, alpha=0.3)

# Plot 5: Trade duration by cluster
ax5 = axes[1, 1]
cluster_durations = [trades_df[trades_df['cluster'] == c]['duration_calc'] for c in [0, 1, 2, 3]]
bp = ax5.boxplot(cluster_durations, labels=['Cluster 0', 'Cluster 1', 'Cluster 2', 'Cluster 3'])
ax5.set_title('Trade Duration Distribution by Cluster', fontsize=14, fontweight='bold')
ax5.set_ylabel('Duration (hours)')
ax5.grid(True, alpha=0.3)

# Plot 6: Session overlap comparison
ax6 = axes[1, 2]
session_order = ['Asian', 'London', 'NY Overlap', 'Late US']
session_means = [session_overlap.loc[s, 'mean'] for s in session_order if s in session_overlap.index]
session_maxs = [session_overlap.loc[s, 'max'] for s in session_order if s in session_overlap.index]
x = np.arange(len(session_order))
width = 0.35
ax6.bar(x - width/2, session_means, width, label='Average', color='steelblue', alpha=0.7)
ax6.bar(x + width/2, session_maxs, width, label='Maximum', color='coral', alpha=0.7)
ax6.set_xticks(x)
ax6.set_xticklabels(session_order, rotation=45)
ax6.set_title('Trade Overlap by Session', fontsize=14, fontweight='bold')
ax6.set_ylabel('Open Trades')
ax6.legend()
ax6.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('phase4_overlap.png', dpi=150, bbox_inches='tight')
plt.close()
print(f"\n Phase 4 visualization saved to: phase4_overlap.png")


 Phase 4 visualization saved to: phase4_overlap.png


In [None]:
# KEY FINDINGS

In [21]:
print("\n" + "=" * 80)
print("PHASE 4 KEY FINDINGS")
print("=" * 80)

print("""
 EXPOSURE STACKING RISK ASSESSMENT:

1. OVERLAP CONCENTRATION:
   - Maximum simultaneous trades: """ + str(overlap_df['total_open'].max()) + """
   - Average exposure: """ + f"{overlap_df['total_open'].mean():.2f}" + """ trades
   - This creates STACKED RISK when multiple clusters move against you

2. TIME CONCENTRATION:
   - Overlap varies significantly by trading session
   - Certain hours have concentrated exposure

3. CLUSTER CONCENTRATION:
   - When multiple trades are open, specific clusters dominate
   - This reduces effective diversification during overlap periods

4. RISK IMPLICATIONS:
   - Multiple trades in same direction = AMPLIFIED DIRECTIONAL RISK
   - High overlap during volatile periods = TAIL RISK EXPOSURE
   - Concentrated sessions = EVENT RISK VULNERABILITY
""")

print("\n PHASE 4 COMPLETED")


PHASE 4 KEY FINDINGS

 EXPOSURE STACKING RISK ASSESSMENT:

1. OVERLAP CONCENTRATION:
   - Maximum simultaneous trades: 4
   - Average exposure: 1.79 trades
   - This creates STACKED RISK when multiple clusters move against you

2. TIME CONCENTRATION:
   - Overlap varies significantly by trading session
   - Certain hours have concentrated exposure

3. CLUSTER CONCENTRATION:
   - When multiple trades are open, specific clusters dominate
   - This reduces effective diversification during overlap periods

4. RISK IMPLICATIONS:
   - Multiple trades in same direction = AMPLIFIED DIRECTIONAL RISK
   - High overlap during volatile periods = TAIL RISK EXPOSURE
   - Concentrated sessions = EVENT RISK VULNERABILITY


 PHASE 4 COMPLETED
