# JGB PCA Strategy: PC1 Hedge Analysis

## Overview
Comparison of two strategies:
1. **Original Strategy**: Long/Short based on PCA reconstruction error.
2. **Hedged Strategy**: Adds a hedge using **7-year JGBs** to neutralize the portfolio's exposure to the **First Principal Component (PC1)**.

### Improvements Made
- **Bond Tracking**: The hedge bond is selected on the entry day and tracked by its **unique name** on the exit day, ensuring consistency even if the "nearest 7-year bond" changes.
- **Spike Mitigation**: Hedge quantity $Q$ is clipped to avoid extreme positions caused by near-zero loadings.
- **Debug Logging**: Added warnings for daily PnL swings larger than 20bp.

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from sklearn.decomposition import PCA
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

if os.getcwd() not in sys.path:
    sys.path.insert(0, os.getcwd())

try:
    from data.utils.database_manager import DatabaseManager
except ImportError:
    sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
    from data.utils.database_manager import DatabaseManager

plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'Hiragino Sans', 'Yu Gothic', 'Meirio']
plt.rcParams['axes.unicode_minus'] = False

In [None]:
def fetch_all_data(db_manager, start_date='2022-01-01'):
    print(f"Fetching data from {start_date}...")
    sql = """
        SELECT trade_date, bond_name, due_date, ave_compound_yield
        FROM bond_data
        WHERE trade_date >= %s
          AND ave_compound_yield IS NOT NULL
          AND ave_compound_yield BETWEEN -1 AND 10
        ORDER BY trade_date, due_date
    """
    try:
        data = db_manager.select_as_dict(sql, (start_date,))
    except AttributeError:
        rows = db_manager.execute_query(sql, (start_date,))
        cols = ['trade_date', 'bond_name', 'due_date', 'ave_compound_yield']
        data = [dict(zip(cols, row)) for row in rows]
    
    if not data:
        return pd.DataFrame()

    df = pd.DataFrame(data)
    df['trade_date'] = pd.to_datetime(df['trade_date'])
    df['due_date'] = pd.to_datetime(df['due_date'])
    df['ave_compound_yield'] = df['ave_compound_yield'].astype(float)
    df['maturity'] = (df['due_date'] - df['trade_date']).dt.days / 365.25
    df = df[df['maturity'] >= 0.5]
    
    print(f"Data loaded: {len(df)} records")
    return df

In [None]:
class PCAStrategyBacktestHedged:
    def __init__(self, df, window=50, grid_points=None):
        self.df = df
        self.window = window
        self.grid_points = grid_points if grid_points is not None else np.linspace(1, 40, 40)
        self.dates = sorted(df['trade_date'].unique())
        self.results = []
    
    def interpolate_curve(self, daily_df, target_grid):
        daily_df = daily_df.sort_values('maturity').drop_duplicates(subset='maturity')
        x, y = daily_df['maturity'].values, daily_df['ave_compound_yield'].values
        if len(x) < 3: return None
        try: return interp1d(x, y, kind='linear', fill_value='extrapolate')(target_grid)
        except: return None

    def get_bond_pc1_loading(self, maturity, pc1_grid_values):
        try: return float(interp1d(self.grid_points, pc1_grid_values, kind='linear', fill_value='extrapolate')(maturity))
        except: return 0.0

    def find_hedge_bond(self, daily_df, target_maturity=7.0):
        daily_df = daily_df.copy()
        daily_df['diff'] = (daily_df['maturity'] - target_maturity).abs()
        return daily_df.loc[daily_df['diff'].idxmin()]

    def run(self):
        print(f"Starting hedged backtest over {len(self.dates)} days")
        for i in range(self.window, len(self.dates) - 1):
            current_date, next_date = self.dates[i], self.dates[i+1]
            train_curves = [self.interpolate_curve(self.df[self.df['trade_date'] == d], self.grid_points) 
                            for d in self.dates[i-self.window : i]]
            train_curves = [c for c in train_curves if c is not None]
            if len(train_curves) < self.window * 0.8: continue
            
            pca = PCA(n_components=2).fit(np.array(train_curves))
            pc1_loadings = pca.components_[0]
            
            current_df = self.df[self.df['trade_date'] == current_date]
            current_curve_grid = self.interpolate_curve(current_df, self.grid_points)
            if current_curve_grid is None: continue
                
            reconstructed_grid = pca.inverse_transform(pca.transform(current_curve_grid.reshape(1, -1))).flatten()
            model_interp = interp1d(self.grid_points, reconstructed_grid, kind='linear', fill_value='extrapolate')
            
            current_df = current_df.copy()
            current_df['error'] = current_df['ave_compound_yield'] - model_interp(current_df['maturity'])
            if current_df['error'].max() <= 0 or current_df['error'].min() >= 0: continue

            long_bond = current_df.loc[current_df['error'].idxmax()]
            short_bond = current_df.loc[current_df['error'].idxmin()]
            
            l_loading = self.get_bond_pc1_loading(long_bond['maturity'], pc1_loadings)
            s_loading = self.get_bond_pc1_loading(short_bond['maturity'], pc1_loadings)
            
            hedge_bond = self.find_hedge_bond(current_df, 7.0)
            h_loading = self.get_bond_pc1_loading(hedge_bond['maturity'], pc1_loadings)
            
            hedge_qty = np.clip(-(l_loading - s_loading) / h_loading, -10, 10) if abs(h_loading) > 1e-4 else 0
            
            next_df = self.df[self.df['trade_date'] == next_date]
            l_next = next_df[next_df['bond_name'] == long_bond['bond_name']]
            s_next = next_df[next_df['bond_name'] == short_bond['bond_name']]
            h_next = next_df[next_df['bond_name'] == hedge_bond['bond_name']]
            
            lpnl = (long_bond['ave_compound_yield'] - l_next.iloc[0]['ave_compound_yield']) * 100 if not l_next.empty else 0
            spnl = (s_next.iloc[0]['ave_compound_yield'] - short_bond['ave_compound_yield']) * 100 if not s_next.empty else 0
            hpnl = hedge_qty * (hedge_bond['ave_compound_yield'] - h_next.iloc[0]['ave_compound_yield']) * 100 if not h_next.empty else 0
            
            self.results.append({'date': current_date, 'no_hedge_pnl': lpnl + spnl, 'hedged_total_pnl': lpnl + spnl + hpnl})
        return pd.DataFrame(self.results)

In [None]:
db_manager = DatabaseManager()
df_all = fetch_all_data(db_manager, start_date='2024-01-01')
if not df_all.empty:
    results_df = PCAStrategyBacktestHedged(df_all, window=50).run()
    results_df['cum_no_hedge'] = results_df['no_hedge_pnl'].cumsum()
    results_df['cum_hedged'] = results_df['hedged_total_pnl'].cumsum()

    plt.figure(figsize=(14, 7))
    plt.plot(results_df['date'], results_df['cum_no_hedge'], label='No Hedge', color='blue')
    plt.plot(results_df['date'], results_df['cum_hedged'], label='PC1 Hedged', color='green', linestyle='--')
    plt.title('Strategy Performance Comparison (bp)')
    plt.legend(); plt.grid(True, alpha=0.3); plt.show()
    print(f"Final Return - No Hedge: {results_df['cum_no_hedge'].iloc[-1]:.2f} bp, Hedged: {results_df['cum_hedged'].iloc[-1]:.2f} bp")