In [None]:
import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

# -----------------------------------------------------------------------------
# STEP 0: SETUP & LOAD DATA
# -----------------------------------------------------------------------------

data_path = Path("..") / "DATA" / "processed" / "master_df.parquet"

if not data_path.exists():
    print(f"Error: File not found at {data_path}")
else:
    df_master = pd.read_parquet(data_path)
    print(f"Loaded Master Data: {df_master.shape}")

In [None]:
# -----------------------------------------------------------------------------
# STEP 1: PREPARE DATA
# -----------------------------------------------------------------------------

# 1. Filter for Bond Yields only (5Y and 10Y)
target_tenors = ['5Y', '10Y']
cols_to_keep = [
    c for c in df_master.columns 
    if "bond_yields__" in c and any(t in c for t in target_tenors)
]

df_levels = df_master[cols_to_keep].copy()
df_levels = df_levels.ffill()

# 2. Rename columns to be cleaner
df_levels.columns = [c.replace("bond_yields__", "") for c in df_levels.columns]

# 3. Compute daily changes
df_changes = df_levels.diff().dropna(how='all')

print(f"Data ready: {df_changes.shape[1]} assets tracked.")
display(df_changes.head())

In [None]:
# -----------------------------------------------------------------------------
# STEP 2: ROLLING CORRELATIONS
# -----------------------------------------------------------------------------

windows = [60, 252]
rolling_corrs = {}

for w in windows:
    rolling_corrs[w] = df_changes.rolling(window=w, min_periods=w//2).corr()
    print(f"Calculated {w}-day rolling correlation.")

In [None]:
# -----------------------------------------------------------------------------
# STEP 3: STABILITY METRICS
# -----------------------------------------------------------------------------

stability_records = []

for tenor in target_tenors:
    # Find all columns for this tenor (e.g., all '10Y' columns)
    tenor_assets = [c for c in df_changes.columns if tenor in c]
    
    # Create unique pairs
    pairs = list(itertools.combinations(tenor_assets, 2))
    
    for asset_a, asset_b in pairs:
        # Extract 252-day correlation
        series_252 = rolling_corrs[252].xs(asset_a, level=1)[asset_b].dropna()
        
        if series_252.empty:
            continue

        stats = {
            'Tenor': tenor,
            'Pair': f"{asset_a.split()[0]} vs {asset_b.split()[0]}",
            'Mean Corr': series_252.mean(),
            'Std Dev': series_252.std(),
            'Stability': (series_252 > 0.4).mean() * 100
        }
        stability_records.append(stats)

# Summary Table
if stability_records:
    stability_df = pd.DataFrame(stability_records)
    stability_df = stability_df.sort_values(by='Stability', ascending=False)
    print("\n--- Correlation Stability ---")
    display(stability_df)
else:
    print("No overlapping data found for any pairs.")

In [None]:
# -----------------------------------------------------------------------------
# STEP 5: VISUALISATION OF PAIRS
# -----------------------------------------------------------------------------

try:
    from adjustText import adjust_text
    HAS_ADJUST_TEXT = True
except ImportError:
    HAS_ADJUST_TEXT = False
    print("'adjustText' not installed. Labels might overlap. Use pip install adjustText")

# Helper to make names readable: "GTUSD10Y Govt" -> "USD10Y"
def clean_ticker(ticker_name):
    return ticker_name.replace("GT", "").replace(" Govt", "")

trader_df = stability_df.copy()
top_pos = trader_df.nlargest(5, 'Mean Corr')
top_neg = trader_df.nsmallest(5, 'Mean Corr')
top_picks = pd.concat([top_pos, top_neg]).sort_values(by='Mean Corr')

# -----------------------------------------------------------------------------
# Plot 1: Bar chart of top pairs
# -----------------------------------------------------------------------------
plt.figure(figsize=(12, 6))

colors = ['red' if x < 0 else 'green' for x in top_picks['Mean Corr']]
clean_labels = [
    f"{clean_ticker(p.split(' vs ')[0])} vs {clean_ticker(p.split(' vs ')[1])}" 
    for p in top_picks['Pair']
]

plt.barh(clean_labels, top_picks['Mean Corr'], color=colors, alpha=0.7)

plt.axvline(0.8, color='green', linestyle=':', label='High Conviction (+0.8)')
plt.axvline(-0.8, color='red', linestyle=':', label='High Inverse (-0.8)')
plt.axvline(0, color='black', linewidth=0.8)

plt.title("Top Candidates for Pairs Trading")
plt.xlabel("Average Correlation")
plt.legend()
plt.tight_layout()
plt.show()

# -----------------------------------------------------------------------------
# Plot 2: Risk vs. reward scatter plot
# -----------------------------------------------------------------------------
plt.figure(figsize=(12, 10))

custom_markers = {"5Y": "o", "10Y": "X"}
sns.scatterplot(
    data=trader_df, 
    x='Mean Corr', 
    y='Std Dev', 
    hue='Tenor', 
    style='Tenor',
    markers=custom_markers, 
    s=150,
    alpha=0.8,
    edgecolor='black'
)

plt.axvspan(0.8, 1.0, ymin=0, ymax=0.3, color='green', alpha=0.1, label='Prime Long Zone')
plt.axvspan(-1.0, -0.8, ymin=0, ymax=0.3, color='red', alpha=0.1, label='Prime Short Zone')

texts = []
for _, row in top_picks.iterrows():
    raw_pair = row['Pair']
    left, right = raw_pair.split(' vs ')
    clean_label = f"{clean_ticker(left)} vs {clean_ticker(right)}"
    
    t = plt.text(
        row['Mean Corr'], 
        row['Std Dev'], 
        clean_label,
        fontsize=9,
        fontweight='bold',
        color='black'
    )
    texts.append(t)

if HAS_ADJUST_TEXT:
    adjust_text(
        texts, 
        arrowprops=dict(arrowstyle='->', color='gray', lw=0.5), 
        expand_points=(1.2, 1.2)
    )

plt.title("Pairs Selection Matrix: Strength vs. Stability")
plt.xlabel("Correlation Strength (Mean)")
plt.ylabel("Instability Risk (Std Dev)")
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=3)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# -----------------------------------------------------------------------------
# Plot 3: Visualisation of rolling correlation time series of the best pair
# -----------------------------------------------------------------------------
best_pick = top_picks.iloc[-1]
best_pair_full = best_pick['Pair']
tenor = best_pick['Tenor']

clean_name = f"{clean_ticker(best_pair_full.split(' vs ')[0])} vs {clean_ticker(best_pair_full.split(' vs ')[1])}"

code_a, code_b = best_pair_full.split(' vs ')
col_a = next(c for c in df_changes.columns if code_a in c and tenor in c)
col_b = next(c for c in df_changes.columns if code_b in c and tenor in c)
series_252 = rolling_corrs[252].xs(col_a, level=1)[col_b]

plt.figure(figsize=(14, 5))
plt.plot(series_252, color='black', linewidth=1.5, label='252-Day Correlation')
plt.axhline(0.8, color='green', linestyle='--', label='Target Threshold (0.8)')
plt.title(f"Trade Validation: {clean_name} ({tenor})")
plt.ylabel("Rolling Correlation")
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# -----------------------------------------------------------------------------
# 5. Deliverable
# -----------------------------------------------------------------------------
STABILITY_THRESHOLD_PERCENT = 80.0 
stability_df['Regime'] = stability_df['Stability'].apply(
    lambda x: 'Stable' if x >= STABILITY_THRESHOLD_PERCENT else 'Unstable'
)

output_csv = Path("..") / "DATA" / "processed" / "pair_stability_metrics.csv"
stability_df.to_csv(output_csv, index=False)
print(f"Deliverable saved: {output_csv}")