# R√©plication Table 2, 3 et annexes A1 √† A6 de Zhang (2021)
## *Pairs trading with general state space models*
### Quantitative Finance, 21(9), 1567-1587

---

**Tables r√©pliqu√©es:**
- Table 2 & 3: Main pairs (PEP-KO, EWT-EWH)
- Table A1: Large Banks + Small Banks
- Table A2: Large √ó Small Banks
- Table A3: In-Sample / Out-of-Sample Large Banks
- Table A4: In-Sample / Out-of-Sample Small Banks
- Table A5: In-Sample Large √ó Small Banks
- Table A6: Out-of-Sample Large √ó Small Banks

## 1. Configuration et Imports

In [None]:
from __future__ import annotations
import sys
from pathlib import Path
import itertools
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from dataclasses import dataclass
from typing import Tuple, List, Dict
import warnings
import time
from IPython.display import display, HTML

warnings.filterwarnings('ignore')

# Configuration des chemins
PROJECT_ROOT = Path('.').resolve().parent
SRC_DIR = PROJECT_ROOT / "src"
sys.path.insert(0, str(SRC_DIR))
DATA_DIR = PROJECT_ROOT / "data"
DATA_FILE = DATA_DIR / "dataGQ.xlsx"

print(f"üìÅ Project root: {PROJECT_ROOT}")
print(f"üìä Data file: {DATA_FILE}")

In [3]:
# Numba (optionnel mais recommand√©)
try:
    from numba import njit
    NUMBA_AVAILABLE = True
    print("‚úÖ Numba disponible - calculs acc√©l√©r√©s")
except ImportError:
    NUMBA_AVAILABLE = False
    print("‚ö†Ô∏è  Numba non disponible - calculs plus lents")
    def njit(*args, **kwargs):
        def decorator(func):
            return func
        if len(args) == 1 and callable(args[0]):
            return args[0]
        return decorator

‚úÖ Numba disponible - calculs acc√©l√©r√©s


## 2. Univers des Actions et P√©riodes

In [4]:
# Stock Universes (Zhang 2021 Appendix)
LARGE_BANKS = ['JPM', 'BAC', 'WFC', 'C', 'USB']
SMALL_BANKS = ['CPF', 'BANC', 'CUBI', 'NBHC', 'FCF']
MAIN_PAIRS = [('PEP', 'KO'), ('EWT', 'EWH')]

# Date Ranges (Zhang 2021)
FULL_SAMPLE_START = '2012-01-03'
FULL_SAMPLE_END = '2019-06-28'
EWT_EWH_END = '2019-05-01'

IN_SAMPLE_START = '2012-01-10'
IN_SAMPLE_END = '2018-01-01'
OUT_SAMPLE_START = '2018-01-01'
OUT_SAMPLE_END = '2019-12-01'

print("üìà Large Banks:", LARGE_BANKS)
print("üìâ Small Banks:", SMALL_BANKS)
print(f"üìÖ Full Sample: {FULL_SAMPLE_START} ‚Üí {FULL_SAMPLE_END}")
print(f"üìÖ In-Sample: {IN_SAMPLE_START} ‚Üí {IN_SAMPLE_END}")
print(f"üìÖ Out-of-Sample: {OUT_SAMPLE_START} ‚Üí {OUT_SAMPLE_END}")

üìà Large Banks: ['JPM', 'BAC', 'WFC', 'C', 'USB']
üìâ Small Banks: ['CPF', 'BANC', 'CUBI', 'NBHC', 'FCF']
üìÖ Full Sample: 2012-01-03 ‚Üí 2019-06-28
üìÖ In-Sample: 2012-01-10 ‚Üí 2018-01-01
üìÖ Out-of-Sample: 2018-01-01 ‚Üí 2019-12-01


## 3. Classes et Fonctions de Base

In [5]:
@dataclass
class PairData:
    """Container for pair price data."""
    PA: pd.Series
    PB: pd.Series
    asset_a: str
    asset_b: str

    @property
    def n_obs(self) -> int:
        return len(self.PA)


@dataclass
class ModelParams:
    """State-space model parameters."""
    theta0: float = 0.0
    theta1: float = 0.95
    theta2: float = 0.0
    q_base: float = 1e-4
    q_het: float = 0.0
    r: float = 1e-4

    @property
    def is_homoscedastic(self) -> bool:
        return self.q_het < 1e-10

In [6]:
def load_pair_data(filepath: str, col_a: str, col_b: str,
                   start_date: str, end_date: str) -> PairData:
    """Load and align pair data from Excel (Yahoo or Bloomberg format)."""
    df = pd.read_excel(filepath)

    if col_a in df.columns:
        if 'Date' in df.columns:
            df = df.set_index('Date')
        elif 'Unnamed: 0' in df.columns:
            df = df.set_index('Unnamed: 0')
        df.index = pd.to_datetime(df.index)
        PA = df[col_a].dropna()
        PB = df[col_b].dropna()
    else:
        # Bloomberg format
        col_a_bb = f'{col_a} US Equity'
        col_b_bb = f'{col_b} US Equity'
        if col_a_bb not in df.columns:
            col_a_bb = f'{col_a} US Equity '
        if col_b_bb not in df.columns:
            col_b_bb = f'{col_b} US Equity '

        def get_series(df, col):
            col_idx = df.columns.get_loc(col)
            date_col = df.columns[col_idx - 1]
            result = pd.DataFrame({
                'date': pd.to_datetime(df[date_col], errors='coerce'),
                'price': pd.to_numeric(df[col], errors='coerce')
            }).dropna().drop_duplicates('date').set_index('date').sort_index()
            return result['price']

        PA = get_series(df, col_a_bb)
        PB = get_series(df, col_b_bb)

    common_idx = PA.index.intersection(PB.index)
    PA, PB = PA.loc[common_idx], PB.loc[common_idx]
    start, end = pd.to_datetime(start_date), pd.to_datetime(end_date)
    mask = (PA.index >= start) & (PA.index <= end)
    return PairData(PA.loc[mask], PB.loc[mask], col_a, col_b)

## 4. Filtres de Kalman (Numba optimis√©s)

In [7]:
@njit(cache=True)
def halton_sequence_njit(size: int, base: int) -> np.ndarray:
    """Generate Halton sequence."""
    sequence = np.zeros(size)
    for i in range(size):
        n = i + 1
        f, result = 1.0, 0.0
        while n > 0:
            f = f / base
            result = result + f * (n % base)
            n = n // base
        sequence[i] = result
    return sequence


@njit(cache=True)
def kalman_filter_njit(y: np.ndarray, theta0: float, theta1: float,
                       q: float, r: float) -> Tuple[float, np.ndarray]:
    """Kalman Filter for Model I (homoscedastic)."""
    n = len(y)
    if abs(theta1) < 0.999:
        x = theta0 / (1.0 - theta1)
        P = q / (1.0 - theta1 * theta1)
    else:
        x = y[0]
        P = q * 10.0
    
    x_filt = np.zeros(n)
    loglik = 0.0
    log_2pi = np.log(2.0 * np.pi)
    
    for t in range(n):
        if t > 0:
            x = theta0 + theta1 * x
            P = theta1 * theta1 * P + q
        v = y[t] - x
        S = P + r
        if S > 1e-12:
            K = P / S
            x = x + K * v
            P = (1.0 - K) * P
            loglik += -0.5 * (log_2pi + np.log(S) + v * v / S)
        x_filt[t] = x
    return loglik, x_filt


@njit(cache=True)
def qmckf_njit(y: np.ndarray, theta0: float, theta1: float,
               q_base: float, q_het: float, r: float,
               n_particles: int) -> Tuple[float, np.ndarray]:
    """Quasi-Monte Carlo Kalman Filter for Model II (heteroscedastic)."""
    n = len(y)
    x = y[0]
    P = q_base + q_het * x * x
    x_filt = np.zeros(n)
    loglik = 0.0
    log_2pi = np.log(2.0 * np.pi)
    
    h1 = halton_sequence_njit(n_particles, 2)
    h2 = halton_sequence_njit(n_particles, 3)
    for i in range(n_particles):
        h1[i] = max(1e-10, min(1.0 - 1e-10, h1[i]))
        h2[i] = max(1e-10, min(1.0 - 1e-10, h2[i]))
    
    z = np.zeros(n_particles)
    for i in range(n_particles):
        z[i] = np.sqrt(-2.0 * np.log(h1[i])) * np.cos(2.0 * np.pi * h2[i])
    
    samples = np.zeros(n_particles)
    f_samples = np.zeros(n_particles)
    
    for t in range(n):
        if t == 0:
            x_p, P_p = x, P
        else:
            sqrt_P = np.sqrt(max(P, 1e-12))
            sum_f = 0.0
            for i in range(n_particles):
                samples[i] = x + sqrt_P * z[i]
                f_samples[i] = theta0 + theta1 * samples[i]
                sum_f += f_samples[i]
            x_p = sum_f / n_particles
            sum_var, sum_g = 0.0, 0.0
            for i in range(n_particles):
                diff = f_samples[i] - x_p
                sum_var += diff * diff
                sum_g += q_base + q_het * samples[i] * samples[i]
            P_p = sum_var / n_particles + sum_g / n_particles
        
        v = y[t] - x_p
        S = P_p + r
        if S > 1e-12:
            K = P_p / S
            x = x_p + K * v
            P = (1.0 - K) * P_p
            loglik += -0.5 * (log_2pi + np.log(S) + v * v / S)
        else:
            x, P = x_p, P_p
        x_filt[t] = x
    return loglik, x_filt

## 5. Strat√©gies de Trading

In [8]:
@njit(cache=True)
def strategy_A_njit(x: np.ndarray, U: np.ndarray, L: np.ndarray, C: float) -> np.ndarray:
    """Strategy A: Enter at boundaries, exit at mean."""
    n = len(x)
    sig = np.zeros(n)
    pos = 0
    for t in range(n):
        if pos == 0:
            if x[t] >= U[t]:
                pos = -1
            elif x[t] <= L[t]:
                pos = 1
        elif pos == 1 and x[t] >= C:
            pos = 0
        elif pos == -1 and x[t] <= C:
            pos = 0
        sig[t] = pos
    return sig


@njit(cache=True)
def strategy_C_njit(x: np.ndarray, U: np.ndarray, L: np.ndarray, C: float) -> np.ndarray:
    """Strategy C: Re-entry with stop-loss (Zhang's main strategy)."""
    n = len(x)
    sig = np.zeros(n)
    pos = 0
    for t in range(1, n):
        prev, curr = x[t - 1], x[t]
        U_prev, U_curr = U[t - 1], U[t]
        L_prev, L_curr = L[t - 1], L[t]
        
        entry_short = (prev > U_prev) and (curr <= U_curr)
        entry_long = (prev < L_prev) and (curr >= L_curr)
        exit_long = (prev < C) and (curr >= C)
        exit_short = (prev > C) and (curr <= C)
        stop_short = (prev < U_prev) and (curr >= U_curr)
        stop_long = (prev > L_prev) and (curr <= L_curr)
        
        if pos == 0:
            if entry_short:
                pos = -1
            elif entry_long:
                pos = 1
        elif pos == 1 and (exit_long or stop_long):
            pos = 0
        elif pos == -1 and (exit_short or stop_short):
            pos = 0
        sig[t] = pos
    return sig


@njit(cache=True)
def compute_thresholds_njit(x_filt: np.ndarray, q_base: float, q_het: float,
                            n_std: float, is_hetero: bool) -> Tuple[np.ndarray, np.ndarray, float]:
    """Compute trading thresholds."""
    n = len(x_filt)
    C = np.mean(x_filt)
    sigma_emp = np.std(x_filt)
    U = np.zeros(n)
    L = np.zeros(n)
    
    if is_hetero and q_het > 1e-10:
        g_x = np.sqrt(q_base + q_het * x_filt * x_filt)
        mean_g = np.mean(g_x)
        for t in range(n):
            sigma_t = g_x[t] / mean_g * sigma_emp
            U[t] = C + n_std * sigma_t
            L[t] = C - n_std * sigma_t
    else:
        threshold = n_std * sigma_emp
        for t in range(n):
            U[t] = C + threshold
            L[t] = C - threshold
    return U, L, C


@njit(cache=True)
def backtest_njit(signals: np.ndarray, x_filt: np.ndarray, cost_bp: float) -> Tuple[float, float, int]:
    """Backtest. Returns (annualized_return, sharpe, n_trades)."""
    n = len(signals)
    pnl = np.zeros(n)
    n_trades = 0
    cost_factor = 2.0 * cost_bp / 10000.0
    
    for t in range(1, n):
        dx = x_filt[t] - x_filt[t - 1]
        pos_change = abs(signals[t] - signals[t - 1])
        if pos_change > 0:
            n_trades += 1
        pnl[t] = signals[t] * dx - pos_change * cost_factor
    
    cum_pnl = np.sum(pnl)
    ann_ret = cum_pnl / (n / 252.0)
    mean_pnl = np.mean(pnl)
    std_pnl = np.std(pnl)
    ann_std = std_pnl * np.sqrt(252.0)
    
    if ann_std > 1e-10:
        sharpe = (ann_ret - 0.02) / ann_std
    else:
        sharpe = 0.0
    return ann_ret, sharpe, n_trades


@njit(cache=True)
def grid_search_njit(x_filt: np.ndarray, q_base: float, q_het: float,
                     is_hetero: bool, use_strategy_C: bool, cost_bp: float) -> Tuple[float, float, float, int]:
    """Grid search for optimal threshold."""
    best_n = 1.0
    best_ret = -1e10
    best_sr = -1e10
    best_trades = 0
    
    for i in range(25):
        n_std = 0.1 + i * 0.1
        U, L, C = compute_thresholds_njit(x_filt, q_base, q_het, n_std, is_hetero)
        
        if use_strategy_C:
            sig = strategy_C_njit(x_filt, U, L, C)
        else:
            sig = strategy_A_njit(x_filt, U, L, C)
        
        ann_ret, sharpe, n_trades = backtest_njit(sig, x_filt, cost_bp)
        
        if n_trades > 0 and sharpe > best_sr:
            best_sr = sharpe
            best_ret = ann_ret
            best_n = n_std
            best_trades = n_trades
    
    return best_n, best_ret, best_sr, best_trades

## 6. Estimation MLE

In [9]:
def estimate_gamma_ols(log_PA: np.ndarray, log_PB: np.ndarray) -> float:
    """Estimate hedge ratio Œ≥ via OLS."""
    X = np.column_stack([np.ones(len(log_PB)), log_PB])
    return float(np.linalg.lstsq(X, log_PA, rcond=None)[0][1])


def estimate_model_I(y: np.ndarray) -> Tuple[ModelParams, np.ndarray, float]:
    """Estimate Model I parameters by MLE."""
    y_mean, y_var = np.mean(y), np.var(y)
    rho = np.corrcoef(y[:-1] - y_mean, y[1:] - y_mean)[0, 1]
    theta1_init = float(np.clip(rho, 0.8, 0.99))

    z0 = np.array([y_mean * (1 - theta1_init), np.arctanh(theta1_init),
                   np.log(y_var * (1 - theta1_init ** 2) * 0.7 + 1e-10),
                   np.log(y_var * 0.3 + 1e-10)])

    def neg_ll(z):
        try:
            ll, _ = kalman_filter_njit(y, z[0], np.tanh(z[1]), np.exp(z[2]), np.exp(z[3]))
            return -ll if np.isfinite(ll) else 1e10
        except:
            return 1e10

    bounds = [(-0.5, 0.5), (np.arctanh(0.5), np.arctanh(0.999)),
              (np.log(1e-8), np.log(1.0)), (np.log(1e-8), np.log(1.0))]
    res = minimize(neg_ll, z0, method='L-BFGS-B', bounds=bounds)
    params = ModelParams(theta0=res.x[0], theta1=np.tanh(res.x[1]),
                         q_base=np.exp(res.x[2]), r=np.exp(res.x[3]))
    ll, x_filt = kalman_filter_njit(y, params.theta0, params.theta1, params.q_base, params.r)
    return params, x_filt, ll


def estimate_model_II(y: np.ndarray) -> Tuple[ModelParams, np.ndarray, float]:
    """Estimate Model II parameters by MLE with QMCKF."""
    y_mean = np.mean(y)
    best_ll, best_params, best_filt = -np.inf, None, None

    for t0, t1, q_b, q_h, r in [(y_mean * 0.01, 0.95, 0.0005, 0.10, 0.010),
                                (y_mean * 0.01, 0.93, 0.0003, 0.13, 0.011),
                                (y_mean * 0.01, 0.96, 0.0010, 0.08, 0.008)]:
        z0 = np.array([t0, np.arctanh(t1), np.log(q_b), np.log(q_h), np.log(r)])

        def neg_ll(z):
            try:
                ll, _ = qmckf_njit(y, z[0], np.tanh(z[1]), np.exp(z[2]), np.exp(z[3]), np.exp(z[4]), 50)
                return -ll if np.isfinite(ll) else 1e10
            except:
                return 1e10

        bounds = [(-0.1, 0.1), (np.arctanh(0.85), np.arctanh(0.99)),
                  (np.log(1e-6), np.log(0.005)), (np.log(0.05), np.log(0.3)),
                  (np.log(0.005), np.log(0.05))]
        try:
            res = minimize(neg_ll, z0, method='L-BFGS-B', bounds=bounds, options={'maxiter': 500})
            params = ModelParams(theta0=res.x[0], theta1=np.tanh(res.x[1]),
                                 q_base=np.exp(res.x[2]), q_het=np.exp(res.x[3]), r=np.exp(res.x[4]))
            ll, x_filt = qmckf_njit(y, params.theta0, params.theta1,
                                    params.q_base, params.q_het, params.r, 100)
            if ll > best_ll:
                best_ll, best_params, best_filt = ll, params, x_filt
        except:
            continue

    if best_params is None:
        best_params = ModelParams(theta0=0.0, theta1=0.95, q_base=0.0003, q_het=0.1, r=0.01)
        best_ll, best_filt = qmckf_njit(y, 0.0, 0.95, 0.0003, 0.1, 0.01, 100)
    return best_params, best_filt, best_ll

## 7. Analyse des Paires

In [10]:
def analyze_pair(pair: PairData, cost_bp: float = 20.0) -> Dict:
    """Analyze a pair: Model I + Strategy A, Model II + Strategy C."""
    log_PA, log_PB = np.log(pair.PA.values), np.log(pair.PB.values)
    gamma = estimate_gamma_ols(log_PA, log_PB)
    y = log_PA - gamma * log_PB

    # Model I + Strategy A
    p1, f1, _ = estimate_model_I(y)
    _, ret_m1, sr_m1, _ = grid_search_njit(f1, p1.q_base, 0.0, False, False, cost_bp)

    # Model II + Strategy C
    p2, f2, _ = estimate_model_II(y)
    _, ret_m2, sr_m2, _ = grid_search_njit(f2, p2.q_base, p2.q_het, True, True, cost_bp)

    return {
        'Stock #1': pair.asset_a,
        'Stock #2': pair.asset_b,
        'M1_Return': ret_m1,
        'M1_Sharpe': sr_m1,
        'M2_Return': ret_m2,
        'M2_Sharpe': sr_m2,
        'Imp_Return': (ret_m2 / ret_m1 - 1) * 100 if abs(ret_m1) > 1e-6 else 0,
        'Imp_Sharpe': (sr_m2 / sr_m1 - 1) * 100 if abs(sr_m1) > 1e-6 else 0,
    }


def analyze_pair_is_oos(filepath: str, col_a: str, col_b: str,
                        is_start: str, is_end: str,
                        oos_start: str, oos_end: str,
                        cost_bp: float = 20.0) -> Tuple[Dict, Dict]:
    """Analyze pair with in-sample and out-of-sample periods."""
    # In-Sample
    pair_is = load_pair_data(filepath, col_a, col_b, is_start, is_end)
    log_PA_is, log_PB_is = np.log(pair_is.PA.values), np.log(pair_is.PB.values)
    gamma = estimate_gamma_ols(log_PA_is, log_PB_is)
    y_is = log_PA_is - gamma * log_PB_is
    
    p1, f1_is, _ = estimate_model_I(y_is)
    _, ret_m1_is, sr_m1_is, _ = grid_search_njit(f1_is, p1.q_base, 0.0, False, False, cost_bp)
    
    p2, f2_is, _ = estimate_model_II(y_is)
    _, ret_m2_is, sr_m2_is, _ = grid_search_njit(f2_is, p2.q_base, p2.q_het, True, True, cost_bp)
    
    result_is = {
        'Stock #1': col_a, 'Stock #2': col_b,
        'M1_Return': ret_m1_is, 'M1_Sharpe': sr_m1_is,
        'M2_Return': ret_m2_is, 'M2_Sharpe': sr_m2_is,
        'Imp_Return': (ret_m2_is / ret_m1_is - 1) * 100 if abs(ret_m1_is) > 1e-6 else 0,
        'Imp_Sharpe': (sr_m2_is / sr_m1_is - 1) * 100 if abs(sr_m1_is) > 1e-6 else 0,
    }
    
    # Out-of-Sample (use IS parameters)
    pair_oos = load_pair_data(filepath, col_a, col_b, oos_start, oos_end)
    log_PA_oos, log_PB_oos = np.log(pair_oos.PA.values), np.log(pair_oos.PB.values)
    y_oos = log_PA_oos - gamma * log_PB_oos
    
    _, f1_oos = kalman_filter_njit(y_oos, p1.theta0, p1.theta1, p1.q_base, p1.r)
    _, ret_m1_oos, sr_m1_oos, _ = grid_search_njit(f1_oos, p1.q_base, 0.0, False, False, cost_bp)
    
    _, f2_oos = qmckf_njit(y_oos, p2.theta0, p2.theta1, p2.q_base, p2.q_het, p2.r, 100)
    _, ret_m2_oos, sr_m2_oos, _ = grid_search_njit(f2_oos, p2.q_base, p2.q_het, True, True, cost_bp)
    
    result_oos = {
        'Stock #1': col_a, 'Stock #2': col_b,
        'M1_Return': ret_m1_oos, 'M1_Sharpe': sr_m1_oos,
        'M2_Return': ret_m2_oos, 'M2_Sharpe': sr_m2_oos,
        'Imp_Return': (ret_m2_oos / ret_m1_oos - 1) * 100 if abs(ret_m1_oos) > 1e-6 else 0,
        'Imp_Sharpe': (sr_m2_oos / sr_m1_oos - 1) * 100 if abs(sr_m1_oos) > 1e-6 else 0,
    }
    
    return result_is, result_oos

## 8. Fonctions d'affichage (format Zhang)

In [11]:
def format_zhang_table(df: pd.DataFrame, title: str, note: str = "") -> pd.DataFrame:
    """Format DataFrame in Zhang (2021) style."""
    # Rename columns
    df_display = df.copy()
    df_display = df_display.rename(columns={
        'M1_Return': 'Return',
        'M1_Sharpe': 'Sharpe',
        'M2_Return': 'Return ',
        'M2_Sharpe': 'Sharpe ',
        'Imp_Return': 'Return  ',
        'Imp_Sharpe': 'Sharpe  '
    })
    
    # Add Pair column
    df_display.insert(0, 'Pair', range(1, len(df_display) + 1))
    
    # Add summary rows
    numeric_cols = ['Return', 'Sharpe', 'Return ', 'Sharpe ', 'Return  ', 'Sharpe  ']
    summary = pd.DataFrame({
        'Pair': ['Mean', 'Min', 'Max', 'Median'],
        'Stock #1': ['', '', '', ''],
        'Stock #2': ['', '', '', ''],
    })
    
    for col in numeric_cols:
        if col in df_display.columns:
            summary[col] = [
                df_display[col].mean(),
                df_display[col].min(),
                df_display[col].max(),
                df_display[col].median()
            ]
    
    df_final = pd.concat([df_display, summary], ignore_index=True)
    
    return df_final


def display_zhang_table(df: pd.DataFrame, title: str, page: int = 0, note: str = ""):
    """Display table in Zhang (2021) style."""
    header = f"<h3>{title}</h3>"
    if page > 0:
        header = f"<div style='text-align:right'>G. Zhang &nbsp;&nbsp; {page}</div>" + header
    
    # Create multi-level header
    html = f"""
    {header}
    <table style='border-collapse: collapse; width: 100%; font-size: 12px;'>
        <tr style='border-bottom: 2px solid black;'>
            <th colspan='3'></th>
            <th colspan='2' style='text-align:center;'>Model I + Strategy A</th>
            <th colspan='2' style='text-align:center;'>Model II + Strategy C</th>
            <th colspan='2' style='text-align:center;'>Improvement (in %)</th>
        </tr>
        <tr style='border-bottom: 1px solid black;'>
            <th>Pair</th>
            <th>Stock #1</th>
            <th>Stock #2</th>
            <th>Return</th>
            <th>Sharpe</th>
            <th>Return</th>
            <th>Sharpe</th>
            <th>Return</th>
            <th>Sharpe</th>
        </tr>
    """
    
    for _, row in df.iterrows():
        pair = row.get('Pair', '')
        s1 = row.get('Stock #1', '')
        s2 = row.get('Stock #2', '')
        m1_ret = row.get('M1_Return', row.get('Return', 0))
        m1_sr = row.get('M1_Sharpe', row.get('Sharpe', 0))
        m2_ret = row.get('M2_Return', row.get('Return ', 0))
        m2_sr = row.get('M2_Sharpe', row.get('Sharpe ', 0))
        imp_ret = row.get('Imp_Return', row.get('Return  ', 0))
        imp_sr = row.get('Imp_Sharpe', row.get('Sharpe  ', 0))
        
        style = "border-top: 1px solid black;" if pair in ['Mean', 'Min', 'Max', 'Median'] else ""
        
        html += f"""
        <tr style='{style}'>
            <td>{pair}</td>
            <td>{s1}</td>
            <td>{s2}</td>
            <td style='text-align:right;'>{m1_ret:.4f}</td>
            <td style='text-align:right;'>{m1_sr:.4f}</td>
            <td style='text-align:right;'>{m2_ret:.4f}</td>
            <td style='text-align:right;'>{m2_sr:.4f}</td>
            <td style='text-align:right;'>{imp_ret:.2f}</td>
            <td style='text-align:right;'>{imp_sr:.2f}</td>
        </tr>
        """
    
    html += "</table>"
    
    if note:
        html += f"<p style='font-size:10px;'><i>Note: {note}</i></p>"
    
    display(HTML(html))

## 9. Warm-up JIT Compilation

In [12]:
# Warm-up JIT compilation
if NUMBA_AVAILABLE:
    print("‚è≥ JIT Compilation...")
    dummy = np.random.randn(100)
    _ = kalman_filter_njit(dummy, 0.0, 0.95, 0.001, 0.001)
    _ = qmckf_njit(dummy, 0.0, 0.95, 0.001, 0.1, 0.01, 50)
    print("‚úÖ JIT Compilation done!")

‚è≥ JIT Compilation...
‚úÖ JIT Compilation done!


---
# Tables 2 & 3: Main Pairs (PEP-KO, EWT-EWH)

In [13]:
print("üìä Analyzing Main Pairs...")
main_results = []

for col_a, col_b in MAIN_PAIRS:
    end_date = EWT_EWH_END if col_a == 'EWT' else FULL_SAMPLE_END
    try:
        pair = load_pair_data(DATA_FILE, col_a, col_b, FULL_SAMPLE_START, end_date)
        print(f"  {col_a}-{col_b}: {pair.n_obs} observations")
        result = analyze_pair(pair)
        main_results.append(result)
    except Exception as e:
        print(f"  ‚ùå {col_a}-{col_b}: Error - {e}")

df_main = pd.DataFrame(main_results)
display_zhang_table(format_zhang_table(df_main, ""), 
                    "Table 2 & 3. Performance of Pairs Trading on Main Pairs",
                    note="Return is the annualized return, displayed in decimal. Sharpe is the annualized Sharpe ratio.")

üìä Analyzing Main Pairs...
  PEP-KO: 1884 observations
  EWT-EWH: 1843 observations


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,PEP,KO,0.0456,0.2152,0.1812,1.2807,297.46,495.2
2,EWT,EWH,0.0521,0.3677,0.0562,0.9637,7.86,162.12
Mean,,,0.0488,0.2914,0.1187,1.1222,152.66,328.66
Min,,,0.0456,0.2152,0.0562,0.9637,7.86,162.12
Max,,,0.0521,0.3677,0.1812,1.2807,297.46,495.2
Median,,,0.0488,0.2914,0.1187,1.1222,152.66,328.66


---
# Table A1: Pairs of Large Banks and Small Banks

In [14]:
print("üìä Analyzing Large Banks...")
large_pairs = list(itertools.combinations(LARGE_BANKS, 2))
results_large = []

for s1, s2 in large_pairs:
    try:
        pair = load_pair_data(DATA_FILE, s1, s2, FULL_SAMPLE_START, FULL_SAMPLE_END)
        result = analyze_pair(pair)
        results_large.append(result)
        print(f"  {s1}-{s2}: SR(M1)={result['M1_Sharpe']:.4f}, SR(M2)={result['M2_Sharpe']:.4f}")
    except Exception as e:
        print(f"  ‚ùå {s1}-{s2}: Error - {e}")

df_large = pd.DataFrame(results_large)
display_zhang_table(format_zhang_table(df_large, ""),
                    "Table A1 - Panel A: Pairs of Large Banks", page=1582,
                    note="Return is the annualized return, displayed in decimal. Sharpe is the annualized Sharpe ratio.")

üìä Analyzing Large Banks...
  JPM-BAC: SR(M1)=0.6002, SR(M2)=1.0097
  JPM-WFC: SR(M1)=-0.0829, SR(M2)=0.7803
  JPM-C: SR(M1)=0.7500, SR(M2)=0.8270
  JPM-USB: SR(M1)=0.6991, SR(M2)=1.1117
  BAC-WFC: SR(M1)=0.0283, SR(M2)=1.1061
  BAC-C: SR(M1)=0.6713, SR(M2)=1.5029
  BAC-USB: SR(M1)=0.4735, SR(M2)=1.8097
  WFC-C: SR(M1)=1.0345, SR(M2)=0.9297
  WFC-USB: SR(M1)=0.1099, SR(M2)=0.5579
  C-USB: SR(M1)=0.5278, SR(M2)=1.5589


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,JPM,BAC,0.0823,0.6002,0.0985,1.0097,19.62,68.24
2,JPM,WFC,0.0112,-0.0829,0.0795,0.7803,609.9,-1041.2
3,JPM,C,0.0869,0.75,0.0621,0.827,-28.48,10.27
4,JPM,USB,0.0738,0.6991,0.0831,1.1117,12.54,59.02
5,BAC,WFC,0.0232,0.0283,0.0861,1.1061,271.1,3808.76
6,BAC,C,0.0909,0.6713,0.1891,1.5029,108.0,123.87
7,BAC,USB,0.0923,0.4735,0.2194,1.8097,137.67,282.2
8,WFC,C,0.061,1.0345,0.0722,0.9297,18.27,-10.13
9,WFC,USB,0.0302,0.1099,0.0481,0.5579,58.93,407.56
10,C,USB,0.0618,0.5278,0.1253,1.5589,102.64,195.33


In [15]:
print("üìä Analyzing Small Banks...")
small_pairs = list(itertools.combinations(SMALL_BANKS, 2))
results_small = []

for s1, s2 in small_pairs:
    try:
        pair = load_pair_data(DATA_FILE, s1, s2, FULL_SAMPLE_START, FULL_SAMPLE_END)
        result = analyze_pair(pair)
        results_small.append(result)
        print(f"  {s1}-{s2}: SR(M1)={result['M1_Sharpe']:.4f}, SR(M2)={result['M2_Sharpe']:.4f}")
    except Exception as e:
        print(f"  ‚ùå {s1}-{s2}: Error - {e}")

df_small = pd.DataFrame(results_small)
if not df_small.empty:
    display_zhang_table(format_zhang_table(df_small, ""),
                        "Table A1 - Panel B: Pairs of Small Banks", page=1582,
                        note="Return is the annualized return, displayed in decimal. Sharpe is the annualized Sharpe ratio.")

üìä Analyzing Small Banks...
  CPF-BANC: SR(M1)=0.4346, SR(M2)=1.4414
  CPF-CUBI: SR(M1)=0.2455, SR(M2)=1.5663
  CPF-NBHC: SR(M1)=0.7210, SR(M2)=1.0260
  CPF-FCF: SR(M1)=0.4552, SR(M2)=1.2666
  BANC-CUBI: SR(M1)=0.6502, SR(M2)=1.5147
  BANC-NBHC: SR(M1)=0.3448, SR(M2)=1.2835
  BANC-FCF: SR(M1)=0.4070, SR(M2)=1.3768
  CUBI-NBHC: SR(M1)=0.4980, SR(M2)=1.1451
  CUBI-FCF: SR(M1)=0.3313, SR(M2)=0.9409
  NBHC-FCF: SR(M1)=0.5937, SR(M2)=1.4648


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,CPF,BANC,0.0858,0.4346,0.1217,1.4414,41.82,231.69
2,CPF,CUBI,0.0581,0.2455,0.1761,1.5663,203.24,538.01
3,CPF,NBHC,0.1033,0.721,0.0822,1.026,-20.39,42.31
4,CPF,FCF,0.0762,0.4552,0.11,1.2666,44.41,178.23
5,BANC,CUBI,0.1591,0.6502,0.2369,1.5147,48.86,132.94
6,BANC,NBHC,0.0705,0.3448,0.1635,1.2835,132.13,272.28
7,BANC,FCF,0.085,0.407,0.2017,1.3768,137.36,238.3
8,CUBI,NBHC,0.1199,0.498,0.1368,1.1451,14.09,129.93
9,CUBI,FCF,0.0997,0.3313,0.1463,0.9409,46.75,183.99
10,NBHC,FCF,0.088,0.5937,0.1079,1.4648,22.57,146.74


---
# Table A2: Pairs Between Large and Small Banks

In [16]:
print("üìä Analyzing Large √ó Small Banks...")
cross_pairs = list(itertools.product(LARGE_BANKS, SMALL_BANKS))
results_cross = []

for s1, s2 in cross_pairs:
    try:
        pair = load_pair_data(DATA_FILE, s1, s2, FULL_SAMPLE_START, FULL_SAMPLE_END)
        result = analyze_pair(pair)
        results_cross.append(result)
        print(f"  {s1}-{s2}: SR(M1)={result['M1_Sharpe']:.4f}, SR(M2)={result['M2_Sharpe']:.4f}")
    except Exception as e:
        print(f"  ‚ùå {s1}-{s2}: Error - {e}")

df_cross = pd.DataFrame(results_cross)
if not df_cross.empty:
    display_zhang_table(format_zhang_table(df_cross, ""),
                        "Table A2. Performance of Pairs Trading on Pairs Between Large Banks and Small Banks", 
                        page=1583,
                        note="Return is the annualized return, displayed in decimal. Sharpe is the annualized Sharpe ratio.")

üìä Analyzing Large √ó Small Banks...
  JPM-CPF: SR(M1)=0.4254, SR(M2)=1.2301
  JPM-BANC: SR(M1)=0.6012, SR(M2)=1.0845
  JPM-CUBI: SR(M1)=0.0060, SR(M2)=0.9446
  JPM-NBHC: SR(M1)=1.1577, SR(M2)=1.4952
  JPM-FCF: SR(M1)=0.3168, SR(M2)=1.5338
  BAC-CPF: SR(M1)=0.6365, SR(M2)=1.4427
  BAC-BANC: SR(M1)=0.9263, SR(M2)=0.9850
  BAC-CUBI: SR(M1)=0.4475, SR(M2)=0.9608
  BAC-NBHC: SR(M1)=0.8576, SR(M2)=1.5523
  BAC-FCF: SR(M1)=0.8640, SR(M2)=1.4668
  WFC-CPF: SR(M1)=0.4333, SR(M2)=0.9462
  WFC-BANC: SR(M1)=1.0897, SR(M2)=1.0740
  WFC-CUBI: SR(M1)=0.5341, SR(M2)=1.3322
  WFC-NBHC: SR(M1)=1.1738, SR(M2)=1.1533
  WFC-FCF: SR(M1)=0.3850, SR(M2)=0.7792
  C-CPF: SR(M1)=0.8814, SR(M2)=1.5036
  C-BANC: SR(M1)=0.6291, SR(M2)=0.9894
  C-CUBI: SR(M1)=0.5827, SR(M2)=1.2423
  C-NBHC: SR(M1)=1.0026, SR(M2)=1.4609
  C-FCF: SR(M1)=0.5826, SR(M2)=1.1415
  USB-CPF: SR(M1)=0.6241, SR(M2)=1.1358
  USB-BANC: SR(M1)=0.7781, SR(M2)=0.8892
  USB-CUBI: SR(M1)=0.1251, SR(M2)=1.0571
  USB-NBHC: SR(M1)=0.4597, SR(M2)=0.8

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,JPM,CPF,0.0803,0.4254,0.1218,1.2301,51.67,189.17
2,JPM,BANC,0.0948,0.6012,0.1939,1.0845,104.56,80.4
3,JPM,CUBI,0.0205,0.006,0.1078,0.9446,425.92,15758.39
4,JPM,NBHC,0.1046,1.1577,0.1373,1.4952,31.25,29.14
5,JPM,FCF,0.0701,0.3168,0.1701,1.5338,142.7,384.12
6,BAC,CPF,0.1292,0.6365,0.1485,1.4427,14.89,126.67
7,BAC,BANC,0.1654,0.9263,0.1367,0.985,-17.37,6.34
8,BAC,CUBI,0.098,0.4475,0.112,0.9608,14.28,114.7
9,BAC,NBHC,0.1409,0.8576,0.1952,1.5523,38.53,81.0
10,BAC,FCF,0.1592,0.864,0.1003,1.4668,-36.97,69.77


---
# Table A3: In-Sample and Out-of-Sample Performance on Large Banks

In [17]:
print("üìä Analyzing Large Banks (IS/OOS)...")
results_a3_is, results_a3_oos = [], []

for s1, s2 in large_pairs:
    try:
        r_is, r_oos = analyze_pair_is_oos(DATA_FILE, s1, s2,
                                          IN_SAMPLE_START, IN_SAMPLE_END,
                                          OUT_SAMPLE_START, OUT_SAMPLE_END)
        results_a3_is.append(r_is)
        results_a3_oos.append(r_oos)
        print(f"  {s1}-{s2}: IS SR={r_is['M2_Sharpe']:.4f}, OOS SR={r_oos['M2_Sharpe']:.4f}")
    except Exception as e:
        print(f"  ‚ùå {s1}-{s2}: Error - {e}")

df_a3_is = pd.DataFrame(results_a3_is)
df_a3_oos = pd.DataFrame(results_a3_oos)

if not df_a3_is.empty:
    display_zhang_table(format_zhang_table(df_a3_is, ""),
                        "Table A3 - Panel A: In Sample Performance on Pairs of Large Banks", 
                        page=1584,
                        note=f"The data is from {IN_SAMPLE_START} to {IN_SAMPLE_END}.")

if not df_a3_oos.empty:
    display_zhang_table(format_zhang_table(df_a3_oos, ""),
                        "Table A3 - Panel B: Out of Sample Performance on Pairs of Large Banks", 
                        page=1584,
                        note=f"The data is from {OUT_SAMPLE_START} to {OUT_SAMPLE_END}.")

üìä Analyzing Large Banks (IS/OOS)...
  JPM-BAC: IS SR=0.7943, OOS SR=0.6630
  JPM-WFC: IS SR=0.9286, OOS SR=1.5864
  JPM-C: IS SR=0.4345, OOS SR=2.0927
  JPM-USB: IS SR=1.2805, OOS SR=2.1821
  BAC-WFC: IS SR=1.3250, OOS SR=1.7203
  BAC-C: IS SR=1.3175, OOS SR=2.1218
  BAC-USB: IS SR=1.7244, OOS SR=2.0028
  WFC-C: IS SR=1.2573, OOS SR=1.8825
  WFC-USB: IS SR=-0.0425, OOS SR=0.9730
  C-USB: IS SR=1.2575, OOS SR=1.7026


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,JPM,BAC,0.0556,0.4277,0.0506,0.7943,-8.99,85.73
2,JPM,WFC,0.0371,0.2841,0.0834,0.9286,124.68,226.86
3,JPM,C,0.04,0.2365,0.0311,0.4345,-22.28,83.69
4,JPM,USB,0.0937,0.844,0.1268,1.2805,35.3,51.71
5,BAC,WFC,0.0679,0.739,0.1895,1.325,179.01,79.31
6,BAC,C,0.1007,0.6956,0.1154,1.3175,14.59,89.41
7,BAC,USB,0.0921,0.4238,0.1409,1.7244,53.03,306.87
8,WFC,C,0.0853,1.1464,0.1068,1.2573,25.26,9.68
9,WFC,USB,0.044,0.329,0.0191,-0.0425,-56.59,-112.92
10,C,USB,0.0812,0.6367,0.091,1.2575,12.18,97.5


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,JPM,BAC,0.0215,0.0387,0.0531,0.663,147.27,1611.87
2,JPM,WFC,0.1968,2.0843,0.0622,1.5864,-68.37,-23.89
3,JPM,C,0.1205,1.4722,0.1044,2.0927,-13.33,42.15
4,JPM,USB,0.135,1.4398,0.1782,2.1821,32.05,51.56
5,BAC,WFC,0.1777,1.7154,0.1897,1.7203,6.75,0.29
6,BAC,C,0.1245,0.8512,0.1934,2.1218,55.33,149.28
7,BAC,USB,0.0574,0.7587,0.249,2.0028,334.08,163.99
8,WFC,C,0.1314,1.6754,0.1289,1.8825,-1.88,12.36
9,WFC,USB,0.0922,0.669,0.0469,0.973,-49.11,45.44
10,C,USB,0.0909,0.6258,0.1326,1.7026,45.93,172.07


---
# Table A4: In-Sample and Out-of-Sample Performance on Small Banks

In [18]:
print("üìä Analyzing Small Banks (IS/OOS)...")
results_a4_is, results_a4_oos = [], []

for s1, s2 in small_pairs:
    try:
        r_is, r_oos = analyze_pair_is_oos(DATA_FILE, s1, s2,
                                          IN_SAMPLE_START, IN_SAMPLE_END,
                                          OUT_SAMPLE_START, OUT_SAMPLE_END)
        results_a4_is.append(r_is)
        results_a4_oos.append(r_oos)
        print(f"  {s1}-{s2}: IS SR={r_is['M2_Sharpe']:.4f}, OOS SR={r_oos['M2_Sharpe']:.4f}")
    except Exception as e:
        print(f"  ‚ùå {s1}-{s2}: Error - {e}")

df_a4_is = pd.DataFrame(results_a4_is)
df_a4_oos = pd.DataFrame(results_a4_oos)

if not df_a4_is.empty:
    display_zhang_table(format_zhang_table(df_a4_is, ""),
                        "Table A4 - Panel A: In Sample Performance on Pairs of Small Banks", 
                        page=1585,
                        note=f"The data is from {IN_SAMPLE_START} to {IN_SAMPLE_END}.")

if not df_a4_oos.empty:
    display_zhang_table(format_zhang_table(df_a4_oos, ""),
                        "Table A4 - Panel B: Out of Sample Performance on Pairs of Small Banks", 
                        page=1585,
                        note=f"The data is from {OUT_SAMPLE_START} to {OUT_SAMPLE_END}.")

üìä Analyzing Small Banks (IS/OOS)...
  CPF-BANC: IS SR=1.2735, OOS SR=1.1844
  CPF-CUBI: IS SR=1.7639, OOS SR=1.3682
  CPF-NBHC: IS SR=1.3074, OOS SR=1.2380
  CPF-FCF: IS SR=1.9088, OOS SR=1.2142
  BANC-CUBI: IS SR=1.3778, OOS SR=2.4242
  BANC-NBHC: IS SR=0.5201, OOS SR=1.2105
  BANC-FCF: IS SR=1.6356, OOS SR=1.4546
  CUBI-NBHC: IS SR=0.7800, OOS SR=1.4032
  CUBI-FCF: IS SR=1.2452, OOS SR=1.3488
  NBHC-FCF: IS SR=1.6888, OOS SR=1.3871


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,CPF,BANC,0.1632,0.9392,0.1541,1.2735,-5.58,35.59
2,CPF,CUBI,0.0756,0.3427,0.2171,1.7639,187.34,414.75
3,CPF,NBHC,0.1403,0.8465,0.0958,1.3074,-31.68,54.44
4,CPF,FCF,0.093,0.5547,0.1844,1.9088,98.39,244.11
5,BANC,CUBI,0.0589,0.7208,0.2379,1.3778,303.97,91.16
6,BANC,NBHC,0.125,0.5345,0.0678,0.5201,-45.74,-2.7
7,BANC,FCF,0.1733,0.7378,0.233,1.6356,34.39,121.68
8,CUBI,NBHC,0.1372,0.5407,0.0961,0.78,-29.97,44.26
9,CUBI,FCF,0.1162,0.5346,0.1828,1.2452,57.36,132.91
10,NBHC,FCF,0.103,0.6987,0.127,1.6888,23.26,141.69


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,CPF,BANC,0.1556,0.9692,0.1894,1.1844,21.71,22.21
2,CPF,CUBI,0.176,1.3479,0.1417,1.3682,-19.5,1.51
3,CPF,NBHC,0.1219,1.0708,0.0695,1.238,-42.94,15.62
4,CPF,FCF,0.1042,0.6692,0.157,1.2142,50.74,81.45
5,BANC,CUBI,0.2208,1.4347,0.3303,2.4242,49.56,68.98
6,BANC,NBHC,0.2135,1.1966,0.0867,1.2105,-59.4,1.16
7,BANC,FCF,0.1445,1.1904,0.1125,1.4546,-22.13,22.19
8,CUBI,NBHC,0.1707,1.6279,0.1462,1.4032,-14.38,-13.8
9,CUBI,FCF,0.1145,1.5515,0.0693,1.3488,-39.5,-13.06
10,NBHC,FCF,0.0896,0.5737,0.1738,1.3871,94.02,141.77


---
# Table A5: In-Sample Performance on Large √ó Small Banks

In [19]:
print("üìä Analyzing Large √ó Small Banks (IS)...")
results_a5 = []

for s1, s2 in cross_pairs:
    try:
        r_is, _ = analyze_pair_is_oos(DATA_FILE, s1, s2,
                                      IN_SAMPLE_START, IN_SAMPLE_END,
                                      OUT_SAMPLE_START, OUT_SAMPLE_END)
        results_a5.append(r_is)
        print(f"  {s1}-{s2}: SR(M2)={r_is['M2_Sharpe']:.4f}")
    except Exception as e:
        print(f"  ‚ùå {s1}-{s2}: Error - {e}")

df_a5 = pd.DataFrame(results_a5)
if not df_a5.empty:
    display_zhang_table(format_zhang_table(df_a5, ""),
                        "Table A5. In Sample Performance of Pairs Trading on Pairs Between Large Banks and Small Banks", 
                        page=1586,
                        note=f"The data is from {IN_SAMPLE_START} to {IN_SAMPLE_END}.")

üìä Analyzing Large √ó Small Banks (IS)...
  JPM-CPF: SR(M2)=1.7906
  JPM-BANC: SR(M2)=1.2798
  JPM-CUBI: SR(M2)=1.1355
  JPM-NBHC: SR(M2)=1.3657
  JPM-FCF: SR(M2)=1.9394
  BAC-CPF: SR(M2)=1.4710
  BAC-BANC: SR(M2)=1.0377
  BAC-CUBI: SR(M2)=1.4439
  BAC-NBHC: SR(M2)=1.7457
  BAC-FCF: SR(M2)=1.4311
  WFC-CPF: SR(M2)=0.8442
  WFC-BANC: SR(M2)=1.0421
  WFC-CUBI: SR(M2)=1.2455
  WFC-NBHC: SR(M2)=0.9480
  WFC-FCF: SR(M2)=0.8021
  C-CPF: SR(M2)=1.2969
  C-BANC: SR(M2)=1.0566
  C-CUBI: SR(M2)=1.6023
  C-NBHC: SR(M2)=1.3456
  C-FCF: SR(M2)=1.1307
  USB-CPF: SR(M2)=1.1586
  USB-BANC: SR(M2)=0.9508
  USB-CUBI: SR(M2)=1.2656
  USB-NBHC: SR(M2)=1.0023
  USB-FCF: SR(M2)=1.2090


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,JPM,CPF,0.0692,0.4472,0.1683,1.7906,143.14,300.38
2,JPM,BANC,0.1574,0.8729,0.1898,1.2798,20.64,46.62
3,JPM,CUBI,0.007,-0.077,0.1825,1.1355,2515.01,-1574.09
4,JPM,NBHC,0.1035,1.0124,0.1157,1.3657,11.85,34.9
5,JPM,FCF,0.0859,0.4809,0.1845,1.9394,114.93,303.29
6,BAC,CPF,0.139,1.1161,0.137,1.471,-1.41,31.79
7,BAC,BANC,0.1806,1.0564,0.0677,1.0377,-62.5,-1.77
8,BAC,CUBI,0.0946,0.424,0.1605,1.4439,69.69,240.55
9,BAC,NBHC,0.1403,0.836,0.1169,1.7457,-16.68,108.81
10,BAC,FCF,0.1699,0.7983,0.0944,1.4311,-44.44,79.27


---
# Table A6: Out-of-Sample Performance on Large √ó Small Banks

In [20]:
print("üìä Analyzing Large √ó Small Banks (OOS)...")
results_a6 = []

for s1, s2 in cross_pairs:
    try:
        _, r_oos = analyze_pair_is_oos(DATA_FILE, s1, s2,
                                       IN_SAMPLE_START, IN_SAMPLE_END,
                                       OUT_SAMPLE_START, OUT_SAMPLE_END)
        results_a6.append(r_oos)
        print(f"  {s1}-{s2}: SR(M2)={r_oos['M2_Sharpe']:.4f}")
    except Exception as e:
        print(f"  ‚ùå {s1}-{s2}: Error - {e}")

df_a6 = pd.DataFrame(results_a6)
if not df_a6.empty:
    display_zhang_table(format_zhang_table(df_a6, ""),
                        "Table A6. Out of Sample Performance of Pairs Trading on Pairs Between Large Banks and Small Banks", 
                        page=1587,
                        note=f"The data is from {OUT_SAMPLE_START} to {OUT_SAMPLE_END}.")

üìä Analyzing Large √ó Small Banks (OOS)...
  JPM-CPF: SR(M2)=1.8992
  JPM-BANC: SR(M2)=1.4055
  JPM-CUBI: SR(M2)=1.3906
  JPM-NBHC: SR(M2)=1.6330
  JPM-FCF: SR(M2)=1.0034
  BAC-CPF: SR(M2)=2.0997
  BAC-BANC: SR(M2)=2.0479
  BAC-CUBI: SR(M2)=1.4217
  BAC-NBHC: SR(M2)=2.3696
  BAC-FCF: SR(M2)=0.8585
  WFC-CPF: SR(M2)=1.8138
  WFC-BANC: SR(M2)=2.4310
  WFC-CUBI: SR(M2)=2.0321
  WFC-NBHC: SR(M2)=1.8905
  WFC-FCF: SR(M2)=1.8322
  C-CPF: SR(M2)=2.1518
  C-BANC: SR(M2)=2.0597
  C-CUBI: SR(M2)=1.8881
  C-NBHC: SR(M2)=2.1989
  C-FCF: SR(M2)=1.4219
  USB-CPF: SR(M2)=1.9530
  USB-BANC: SR(M2)=1.6552
  USB-CUBI: SR(M2)=1.8249
  USB-NBHC: SR(M2)=1.9446
  USB-FCF: SR(M2)=1.0290


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model I + Strategy A,Model I + Strategy A,Model II + Strategy C,Model II + Strategy C,Improvement (in %),Improvement (in %)
Pair,Stock #1,Stock #2,Return,Sharpe,Return,Sharpe,Return,Sharpe
1,JPM,CPF,0.0619,0.4293,0.1909,1.8992,208.23,342.41
2,JPM,BANC,0.0755,0.3843,0.1303,1.4055,72.47,265.78
3,JPM,CUBI,0.1103,1.4064,0.1186,1.3906,7.54,-1.12
4,JPM,NBHC,0.0588,0.6707,0.1772,1.633,201.3,143.48
5,JPM,FCF,0.041,0.228,0.1698,1.0034,314.43,340.03
6,BAC,CPF,0.1232,0.8243,0.2571,2.0997,108.62,154.72
7,BAC,BANC,0.0832,0.5906,0.1929,2.0479,131.88,246.73
8,BAC,CUBI,0.092,1.172,0.1033,1.4217,12.33,21.31
9,BAC,NBHC,0.1764,1.2667,0.1393,2.3696,-21.02,87.08
10,BAC,FCF,0.0287,0.0946,0.0802,0.8585,179.17,807.37


---
# R√©sum√© Final

In [21]:
print("\n" + "="*80)
print("R√âSUM√â FINAL")
print("="*80)

all_tables = {
    'Table 2&3 (Main)': df_main,
    'Table A1 (Large Banks)': df_large,
    'Table A1 (Small Banks)': df_small if 'df_small' in dir() and not df_small.empty else pd.DataFrame(),
    'Table A2 (Large√óSmall)': df_cross if 'df_cross' in dir() and not df_cross.empty else pd.DataFrame(),
    'Table A3 IS': df_a3_is if 'df_a3_is' in dir() and not df_a3_is.empty else pd.DataFrame(),
    'Table A3 OOS': df_a3_oos if 'df_a3_oos' in dir() and not df_a3_oos.empty else pd.DataFrame(),
    'Table A4 IS': df_a4_is if 'df_a4_is' in dir() and not df_a4_is.empty else pd.DataFrame(),
    'Table A4 OOS': df_a4_oos if 'df_a4_oos' in dir() and not df_a4_oos.empty else pd.DataFrame(),
    'Table A5 (IS)': df_a5 if 'df_a5' in dir() and not df_a5.empty else pd.DataFrame(),
    'Table A6 (OOS)': df_a6 if 'df_a6' in dir() and not df_a6.empty else pd.DataFrame(),
}

for name, df in all_tables.items():
    if not df.empty:
        print(f"\n{name}:")
        print(f"  Pairs: {len(df)}")
        print(f"  Mean M1 Sharpe: {df['M1_Sharpe'].mean():.4f}")
        print(f"  Mean M2 Sharpe: {df['M2_Sharpe'].mean():.4f}")
    else:
        print(f"\n{name}: No data (tickers not available)")


R√âSUM√â FINAL

Table 2&3 (Main):
  Pairs: 2
  Mean M1 Sharpe: 0.2914
  Mean M2 Sharpe: 1.1222

Table A1 (Large Banks):
  Pairs: 10
  Mean M1 Sharpe: 0.4812
  Mean M2 Sharpe: 1.1194

Table A1 (Small Banks):
  Pairs: 10
  Mean M1 Sharpe: 0.4681
  Mean M2 Sharpe: 1.3026

Table A2 (Large√óSmall):
  Pairs: 25
  Mean M1 Sharpe: 0.6325
  Mean M2 Sharpe: 1.1652

Table A3 IS:
  Pairs: 10
  Mean M1 Sharpe: 0.5763
  Mean M2 Sharpe: 1.0277

Table A3 OOS:
  Pairs: 10
  Mean M1 Sharpe: 1.1330
  Mean M2 Sharpe: 1.6927

Table A4 IS:
  Pairs: 10
  Mean M1 Sharpe: 0.6450
  Mean M2 Sharpe: 1.3501

Table A4 OOS:
  Pairs: 10
  Mean M1 Sharpe: 1.1632
  Mean M2 Sharpe: 1.4233

Table A5 (IS):
  Pairs: 25
  Mean M1 Sharpe: 0.6932
  Mean M2 Sharpe: 1.2616

Table A6 (OOS):
  Pairs: 25
  Mean M1 Sharpe: 1.0181
  Mean M2 Sharpe: 1.7702
