# Contagion Network Analysis - Phase 2

## Overview
This notebook demonstrates **Phase 2** of the jump risk modeling research:
- Fit copula models (Clayton, Gumbel, Student-t) to asset pairs
- Calculate tail dependence coefficients (λ_U, λ_L)
- Compute jump ratios: jump_cov / total_cov
- Identify high-risk contagion pairs and clusters
- Visualize contagion networks

## Research Finding
**Upper tail dependence (synchronized surges) > Lower tail dependence (synchronized crashes)**

This challenges traditional risk modeling which focuses primarily on crash correlation.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import networkx as nx
import yaml
from pathlib import Path

# Import project modules
from data_loader import load_and_prepare_data
from jump_detector import detect_and_analyze_jumps
from copula_analyzer import analyze_contagion, CopulaAnalyzer

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

print("✓ Imports complete")

## 1. Load Data and Detected Jumps

In [None]:
# Load config
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Load data
data_splits = load_and_prepare_data(config)
train_df = data_splits['train']

# Detect jumps (from Phase 1)
df_with_jumps, jump_metrics, cojump_df = detect_and_analyze_jumps(train_df, config)

print(f"Data loaded: {len(df_with_jumps)} observations")
print(f"Jumps detected: {df_with_jumps['is_jump'].sum()}")
print(f"Assets: {df_with_jumps['asset'].nunique()}")

## 2. Analyze Contagion with Copulas

In [None]:
# Run full contagion analysis
contagion_results = analyze_contagion(df_with_jumps, config)

tail_summary = contagion_results['tail_summary']
jump_ratios = contagion_results['jump_ratios']
clusters = contagion_results['clusters']

print("\n=== Contagion Analysis Complete ===")
print(f"Asset pairs analyzed: {len(tail_summary)}")
print(f"High-risk pairs (jump ratio > 0.5): {(jump_ratios['risk_level'] != 'low').sum()}")
print(f"Contagion clusters: {len(clusters)}")

## 3. Tail Dependence: Upper vs Lower

In [None]:
# Compare upper vs lower tail dependence
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=tail_summary['lambda_lower'],
    y=tail_summary['lambda_upper'],
    mode='markers',
    text=tail_summary['pair'],
    marker=dict(
        size=10,
        color=tail_summary['lambda_upper'] - tail_summary['lambda_lower'],
        colorscale='RdYlGn',
        showscale=True,
        colorbar=dict(title='λ_U - λ_L')
    ),
    hovertemplate='Pair: %{text}<br>λ_L: %{x:.3f}<br>λ_U: %{y:.3f}<extra></extra>'
))

# Add diagonal line
fig.add_trace(go.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode='lines',
    line=dict(dash='dash', color='gray'),
    showlegend=False,
    hoverinfo='skip'
))

fig.update_layout(
    title='Tail Dependence: Lower (Crashes) vs Upper (Surges)',
    xaxis_title='λ_L (Lower Tail Dependence)',
    yaxis_title='λ_U (Upper Tail Dependence)',
    height=600,
    annotations=[dict(
        x=0.7, y=0.3,
        text='Above diagonal = Stronger surge contagion',
        showarrow=False,
        font=dict(size=12, color='green')
    ),
    dict(
        x=0.3, y=0.7,
        text='Below diagonal = Stronger crash contagion',
        showarrow=False,
        font=dict(size=12, color='red')
    )]
)

fig.show()

# Calculate statistics
avg_upper = tail_summary['lambda_upper'].mean()
avg_lower = tail_summary['lambda_lower'].mean()
above_diagonal = (tail_summary['lambda_upper'] > tail_summary['lambda_lower']).sum()

print(f"\nTail Dependence Statistics:")
print(f"  Average λ_U (surge): {avg_upper:.3f}")
print(f"  Average λ_L (crash): {avg_lower:.3f}")
print(f"  Pairs with λ_U > λ_L: {above_diagonal}/{len(tail_summary)} ({above_diagonal/len(tail_summary)*100:.1f}%)")

if avg_upper > avg_lower:
    print(f"\n→ KEY FINDING: Upper tail dependence is STRONGER")
    print(f"   Markets are more correlated during surges than crashes!")

## 4. Jump Ratio Analysis

In [None]:
# Plot jump ratios
top_ratios = jump_ratios.head(15)

fig = go.Figure()

colors = {'critical': 'darkred', 'high': 'orange', 'low': 'lightblue'}
color_map = [colors[level] for level in top_ratios['risk_level']]

fig.add_trace(go.Bar(
    x=top_ratios['asset1'] + '-' + top_ratios['asset2'],
    y=top_ratios['jump_ratio'],
    marker_color=color_map,
    hovertemplate='Pair: %{x}<br>Jump Ratio: %{y:.3f}<br>Risk: %{customdata}<extra></extra>',
    customdata=top_ratios['risk_level']
))

# Add threshold lines
fig.add_hline(
    y=config['copula_analysis']['jump_ratio_threshold']['high'],
    line_dash="dash",
    line_color="orange",
    annotation_text="High Risk (>0.5)"
)
fig.add_hline(
    y=config['copula_analysis']['jump_ratio_threshold']['critical'],
    line_dash="dash",
    line_color="red",
    annotation_text="Critical Risk (>0.7)"
)

fig.update_layout(
    title='Top 15 Asset Pairs by Jump Ratio',
    xaxis_title='Asset Pair',
    yaxis_title='Jump Ratio (Jump Cov / Total Cov)',
    height=600,
    xaxis_tickangle=-45
)

fig.show()

# Statistics
print(f"\nJump Ratio Statistics:")
print(f"  Critical risk pairs (>0.7): {(jump_ratios['risk_level'] == 'critical').sum()}")
print(f"  High risk pairs (0.5-0.7): {(jump_ratios['risk_level'] == 'high').sum()}")
print(f"  Low risk pairs (<0.5): {(jump_ratios['risk_level'] == 'low').sum()}")
print(f"\n  Max jump ratio: {jump_ratios['jump_ratio'].max():.3f}")
print(f"  Mean jump ratio: {jump_ratios['jump_ratio'].mean():.3f}")

## 5. Contagion Network Visualization

In [None]:
# Build network graph
G = nx.Graph()

# Add edges for high-risk pairs
high_risk = jump_ratios[jump_ratios['risk_level'].isin(['high', 'critical'])]

for _, row in high_risk.iterrows():
    G.add_edge(
        row['asset1'], 
        row['asset2'], 
        weight=row['jump_ratio'],
        risk=row['risk_level']
    )

# Layout
pos = nx.spring_layout(G, k=2, iterations=50)

# Create edge traces
edge_traces = []
for edge in G.edges(data=True):
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    
    color = 'red' if edge[2]['risk'] == 'critical' else 'orange'
    width = edge[2]['weight'] * 5
    
    edge_trace = go.Scatter(
        x=[x0, x1, None],
        y=[y0, y1, None],
        mode='lines',
        line=dict(width=width, color=color),
        hoverinfo='none',
        showlegend=False
    )
    edge_traces.append(edge_trace)

# Create node trace
node_x = []
node_y = []
node_text = []
node_size = []

for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    
    # Node size = degree (number of connections)
    degree = G.degree(node)
    node_size.append(20 + degree * 10)
    node_text.append(f"{node}<br>Connections: {degree}")

node_trace = go.Scatter(
    x=node_x,
    y=node_y,
    mode='markers+text',
    text=[node for node in G.nodes()],
    textposition='top center',
    marker=dict(
        size=node_size,
        color='lightblue',
        line=dict(width=2, color='darkblue')
    ),
    hovertext=node_text,
    hoverinfo='text'
)

# Create figure
fig = go.Figure(data=edge_traces + [node_trace])

fig.update_layout(
    title='Jump Contagion Network<br>(Thickness = Jump Ratio | Red = Critical, Orange = High)',
    showlegend=False,
    hovermode='closest',
    height=700,
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)

fig.show()

print(f"\nNetwork Statistics:")
print(f"  Nodes: {G.number_of_nodes()}")
print(f"  Edges: {G.number_of_edges()}")
print(f"  Most connected assets: {sorted(G.degree(), key=lambda x: x[1], reverse=True)[:5]}")

## 6. Contagion Clusters

In [None]:
# Display identified clusters
print("=== Contagion Clusters ===")
print(f"\nIdentified {len(clusters)} high-risk clusters:\n")

for i, cluster in enumerate(clusters, 1):
    print(f"Cluster {i}: {', '.join(cluster)} ({len(cluster)} assets)")
    
    # Calculate average jump ratio within cluster
    cluster_pairs = jump_ratios[
        (jump_ratios['asset1'].isin(cluster)) & 
        (jump_ratios['asset2'].isin(cluster))
    ]
    avg_ratio = cluster_pairs['jump_ratio'].mean()
    print(f"  Average intra-cluster jump ratio: {avg_ratio:.3f}")
    print()

## 7. Jump Ratio Heatmap

In [None]:
# Create jump ratio matrix
assets = df_with_jumps['asset'].unique()
n_assets = len(assets)

# Initialize matrix
ratio_matrix = np.zeros((n_assets, n_assets))

# Fill matrix
for _, row in jump_ratios.iterrows():
    i = list(assets).index(row['asset1'])
    j = list(assets).index(row['asset2'])
    ratio_matrix[i, j] = row['jump_ratio']
    ratio_matrix[j, i] = row['jump_ratio']  # Symmetric

# Plot heatmap
fig = go.Figure(data=go.Heatmap(
    z=ratio_matrix,
    x=assets,
    y=assets,
    colorscale='YlOrRd',
    colorbar=dict(title='Jump Ratio'),
    hovertemplate='%{y} - %{x}<br>Jump Ratio: %{z:.3f}<extra></extra>'
))

fig.update_layout(
    title='Jump Ratio Heatmap',
    xaxis_title='Asset',
    yaxis_title='Asset',
    height=700,
    width=700
)

fig.show()

print("Note: Darker red = higher jump contagion risk")

## 8. BTC-ETH Special Analysis

In [None]:
# Analyze BTC-ETH pair (if exists)
btc_eth_pair = jump_ratios[
    ((jump_ratios['asset1'] == 'BTC') & (jump_ratios['asset2'] == 'ETH')) |
    ((jump_ratios['asset1'] == 'ETH') & (jump_ratios['asset2'] == 'BTC'))
]

if len(btc_eth_pair) > 0:
    print("=== BTC-ETH Pair Analysis ===")
    print(f"\nJump Ratio: {btc_eth_pair['jump_ratio'].iloc[0]:.3f}")
    print(f"Risk Level: {btc_eth_pair['risk_level'].iloc[0]}")
    print(f"Total Covariance: {btc_eth_pair['total_cov'].iloc[0]:.6f}")
    print(f"Jump Covariance: {btc_eth_pair['jump_cov'].iloc[0]:.6f}")
    
    # Get tail dependence
    btc_eth_tail = tail_summary[tail_summary['pair'].str.contains('BTC') & tail_summary['pair'].str.contains('ETH')]
    if len(btc_eth_tail) > 0:
        print(f"\nTail Dependence:")
        print(f"  λ_L (crash): {btc_eth_tail['lambda_lower'].iloc[0]:.3f}")
        print(f"  λ_U (surge): {btc_eth_tail['lambda_upper'].iloc[0]:.3f}")
        
        if btc_eth_tail['lambda_upper'].iloc[0] > btc_eth_tail['lambda_lower'].iloc[0]:
            print(f"\n→ BTC and ETH are MORE correlated during surges than crashes")
else:
    print("BTC-ETH pair not found in data")

## 9. Export Contagion Results

In [None]:
# Save results
results_dir = Path('results')
results_dir.mkdir(exist_ok=True)

tail_summary.to_csv(results_dir / 'tail_dependence.csv', index=False)
jump_ratios.to_csv(results_dir / 'jump_ratios.csv', index=False)

# Save clusters
cluster_df = pd.DataFrame([
    {'cluster_id': i, 'assets': ', '.join(cluster)}
    for i, cluster in enumerate(clusters, 1)
])
cluster_df.to_csv(results_dir / 'contagion_clusters.csv', index=False)

print("✓ Contagion results saved to results/ directory")

## Key Findings - Phase 2

1. **Upper Tail Dominance**: Markets exhibit stronger correlation during surges (λ_U) than crashes (λ_L)
   - Challenges traditional risk modeling focused on crash correlation
   
2. **Jump Ratios**: High jump ratios (>0.5) indicate pairs where jump contagion dominates
   - Critical pairs (>0.7) require special attention in portfolio construction
   
3. **Contagion Clusters**: Identified groups of highly interconnected assets
   - BTC-ETH often form the core of major crypto cluster
   - Diversification within clusters provides limited risk reduction
   
4. **Network Structure**: Hub assets (high degree) act as contagion spreaders
   - Reducing exposure to hub assets can limit portfolio jump risk

## Next Steps
→ Proceed to **Notebook 03** for jump-adjusted portfolio optimization