In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller, coint
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.regression.linear_model import OLS
from itertools import combinations
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

In [33]:
def find_cointegrated_pairs(tickers, 
                           correlation_threshold=0.7,
                           adf_pvalue_threshold=0.05,
                           lookback_years=8,
                           end_date=datetime.now(),
                           individual_adf_pvalue=0.05):
    """
    Finds cointegrated pairs among a list of stock tickers.
    
    Cointegration Requirements (Engle-Granger):
    - Each series is individually I(1) - non-stationary in levels
    - Linear combination (spread/residuals) is I(0) - stationary
    - ADF test on residuals from OLS regression
    - Johansen test for validation
    
    Parameters:
    -----------
    tickers : list
        List of ticker symbols (20+)
    correlation_threshold : float
        Minimum correlation coefficient (default: 0.7)
    adf_pvalue_threshold : float
        Maximum p-value for ADF test on residuals (default: 0.05)
    lookback_years : int
        Years of historical data to analyze (default: 10)
    individual_adf_pvalue : float
        Maximum p-value for individual series to be I(1) (default: 0.05)
        Series should FAIL to reject null (p > threshold) to be non-stationary
    
    Returns:
    --------
    list of tuples
        Cointegrated pairs: [(ticker1, ticker2, stats_dict), ...]
    """
    
    print(f"Fetching {lookback_years} years of data for {len(tickers)} tickers...")
    
    # Download historical data
    #end_date = datetime.now() -  timedelta(days=7*365) #train set 2010-2018
    start_date = end_date - timedelta(days=lookback_years*365)
    
    data = yf.download(tickers, start=start_date, end=end_date, progress=False)['Close']
    
    # Handle single ticker case (returns Series instead of DataFrame)
    if isinstance(data, pd.Series):
        data = data.to_frame()
    
    # Drop tickers with insufficient data
    min_data_points = int(lookback_years * 252 * 0.8)  # 80% of expected trading days
    data = data.dropna(axis=1, thresh=min_data_points)
    
    available_tickers = data.columns.tolist()
    print(f"Valid tickers with sufficient data: {len(available_tickers)}")
    
    if len(available_tickers) < 2:
        print("Insufficient tickers with valid data.")
        return []
    
    # Drop rows with any NaN values for pairwise analysis
    data = data.dropna()
    
    print(f"Data shape: {data.shape} (rows: {data.shape[0]}, tickers: {data.shape[1]})")
    
    print("\nStep 1: Testing individual series for non-stationarity I(1)...")
    
    # Test each series individually - they should be I(1) (non-stationary in levels)
    stationary_tickers = []
    non_stationary_tickers = []
    
    for ticker in available_tickers:
        prices = data[ticker].values
        adf_result = adfuller(prices, maxlag=1, regression='c')
        adf_pvalue = adf_result[1]
        
        # For I(1): we want to FAIL to reject null hypothesis (non-stationary)
        # So p-value should be > threshold (e.g., > 0.05)
        if adf_pvalue > individual_adf_pvalue:
            non_stationary_tickers.append(ticker)
            print(f"  ✓ {ticker}: Non-stationary (p={adf_pvalue:.4f}) - I(1)")
        else:
            stationary_tickers.append(ticker)
            print(f"  ✗ {ticker}: Stationary (p={adf_pvalue:.4f}) - I(0), excluding")
    
    print(f"\nValid I(1) tickers: {len(non_stationary_tickers)}")
    
    if len(non_stationary_tickers) < 2:
        print("Insufficient non-stationary tickers for cointegration analysis.")
        return []
    
    # Filter data to only include non-stationary tickers
    data = data[non_stationary_tickers]
    
    print("\nStep 2: Calculating correlation matrix...")
    
    # Calculate correlation matrix
    correlation_matrix = data.corr()
    
    # Find pairs with high correlation (only among non-stationary tickers)
    highly_correlated_pairs = []
    for i, ticker1 in enumerate(non_stationary_tickers):
        for ticker2 in non_stationary_tickers[i+1:]:
            corr = correlation_matrix.loc[ticker1, ticker2]
            if corr > correlation_threshold:
                highly_correlated_pairs.append((ticker1, ticker2, corr))
    
    print(f"Found {len(highly_correlated_pairs)} pairs with correlation > {correlation_threshold}")
    
    if len(highly_correlated_pairs) == 0:
        print("No highly correlated pairs found.")
        return []
    
    print("\nStep 3: Testing for cointegration (Engle-Granger): OLS, ADF on residuals, Johansen test...")
    cointegrated_pairs = []
    
    for ticker1, ticker2, corr in highly_correlated_pairs:
        try:
            prices1 = data[ticker1].values
            prices2 = data[ticker2].values
            
            # Engle-Granger two-step procedure:
            # Step 1: Run OLS regression on price levels: Y = α + β*X + ε
            X = np.column_stack([np.ones(len(prices2)), prices2])
            model = OLS(prices1, X).fit()
            residuals = model.resid  # This is epsilon (the spread/error)
            hedge_ratio = model.params[1]  # Beta coefficient
            
            # Step 2: Test if residuals (epsilon/spread) are stationary I(0)
            # Apply Augmented Dickey-Fuller test to residuals
            adf_result = adfuller(residuals, maxlag=1, regression='c')
            adf_statistic = adf_result[0]
            adf_pvalue = adf_result[1]
            
            # For cointegration: residuals must be stationary
            # We want to REJECT null hypothesis (p-value < threshold)
            if adf_pvalue < adf_pvalue_threshold:
                # Perform Johansen test for additional validation
                # Johansen test requires 2D array of both price series
                price_matrix = np.column_stack([prices1, prices2])
                
                try:
                    johansen_result = coint_johansen(price_matrix, det_order=0, k_ar_diff=1)
                    # Check trace statistic against 95% critical value
                    trace_stat = johansen_result.lr1[0]  # First eigenvalue trace stat
                    critical_value_95 = johansen_result.cvt[0, 1]  # 95% critical value
                    johansen_pass = trace_stat > critical_value_95
                    
                    if johansen_pass:
                        stats = {
                            'correlation': round(corr, 4),
                            'hedge_ratio': round(hedge_ratio, 4),
                            'adf_statistic': round(adf_statistic, 4),
                            'adf_pvalue': round(adf_pvalue, 6),
                            'johansen_trace': round(trace_stat, 4),
                            'johansen_critical_95': round(critical_value_95, 4),
                            'spread_mean': round(np.mean(residuals), 4),
                            'spread_std': round(np.std(residuals), 4)
                        }
                        cointegrated_pairs.append((ticker1, ticker2, stats))
                        print(f"✓ {ticker1} - {ticker2}: COINTEGRATED")
                        print(f"    Correlation: {corr:.4f} | Hedge Ratio: {hedge_ratio:.4f}")
                        print(f"    ADF p-value: {adf_pvalue:.6f} (residuals stationary)")
                        print(f"    Johansen: {trace_stat:.2f} > {critical_value_95:.2f}")
                    else:
                        continue
                        #print(f"✗ {ticker1} - {ticker2}: Failed Johansen test ({trace_stat:.2f} < {critical_value_95:.2f})")
                    
                except Exception as e:
                    # If Johansen test fails, skip this pair
                    print(f"✗ {ticker1} - {ticker2}: Johansen test failed - {str(e)}")
                    continue
            
        except Exception as e:
            print(f"✗ {ticker1} - {ticker2}: Error during testing - {str(e)}")
            continue
    
    print(f"\n{'='*60}")
    print(f"RESULTS: Found {len(cointegrated_pairs)} cointegrated pairs")
    print(f"{'='*60}")
    
    return cointegrated_pairs

In [4]:
tickers = [
    # Energy
    "XOM", "CVX", "BP", "SHEL", "COP", "TOT", "EOG", "SLB", "HAL",
    
    # Financials
    "JPM", "BAC", "WFC", "C", "GS", "MS", "AXP", "USB", "BK",
    
    # Technology
    "AAPL", "MSFT", "GOOG", "META", "NVDA", "ADBE", "ORCL", "INTC", "CSCO",
    
    # Consumer Staples
    "KO", "PEP", "PG", "CL", "KMB", "MDLZ", "KHC", "GIS",
    
    # Consumer Discretionary
    "MCD", "SBUX", "NKE", "HD", "LOW", "TGT", "AMZN", "EBAY",
    
    # Healthcare
    "JNJ", "PFE", "MRK", "ABBV", "BMY", "GILD", "AMGN",
    
    # Industrials
    "CAT", "DE", "GE", "HON", "UPS", "FDX", "MMM",
    
    # Utilities
    "NEE", "DUK", "SO", "D", "AEP", "EXC",
    
    # Telecommunications
    "VZ", "T", "TMUS",
    
    # Materials
    "LIN", "APD", "NUE", "SHW", "DD",
    
    # ETFs / Index Trackers
    "SPY", "VOO", "QQQ", "DIA", "IWM"
]


# Comprehensive economic sector classification
SECTOR_CLASSIFICATION = {
    # Energy
    "XOM": "Energy", "CVX": "Energy", "BP": "Energy", "SHEL": "Energy",
    "COP": "Energy", "TOT": "Energy", "EOG": "Energy", "SLB": "Energy", "HAL": "Energy",
    
    # Financials
    "JPM": "Financials", "BAC": "Financials", "WFC": "Financials", "C": "Financials",
    "GS": "Financials", "MS": "Financials", "AXP": "Financials", "USB": "Financials", "BK": "Financials",
    
    # Technology
    "AAPL": "Technology", "MSFT": "Technology", "GOOG": "Technology", "META": "Technology",
    "NVDA": "Technology", "ADBE": "Technology", "ORCL": "Technology", "INTC": "Technology", "CSCO": "Technology",
    
    # Consumer Staples
    "KO": "Consumer_Staples", "PEP": "Consumer_Staples", "PG": "Consumer_Staples", "CL": "Consumer_Staples",
    "KMB": "Consumer_Staples", "MDLZ": "Consumer_Staples", "KHC": "Consumer_Staples", "GIS": "Consumer_Staples",
    
    # Consumer Discretionary
    "MCD": "Consumer_Discretionary", "SBUX": "Consumer_Discretionary", "NKE": "Consumer_Discretionary",
    "HD": "Consumer_Discretionary", "LOW": "Consumer_Discretionary", "TGT": "Consumer_Discretionary",
    "AMZN": "Consumer_Discretionary", "EBAY": "Consumer_Discretionary",
    
    # Healthcare
    "JNJ": "Healthcare", "PFE": "Healthcare", "MRK": "Healthcare", "ABBV": "Healthcare",
    "BMY": "Healthcare", "GILD": "Healthcare", "AMGN": "Healthcare",
    
    # Industrials
    "CAT": "Industrials", "DE": "Industrials", "GE": "Industrials", "HON": "Industrials",
    "UPS": "Industrials", "FDX": "Industrials", "MMM": "Industrials",
    
    # Utilities
    "NEE": "Utilities", "DUK": "Utilities", "SO": "Utilities", "D": "Utilities",
    "AEP": "Utilities", "EXC": "Utilities",
    
    # Telecommunications
    "VZ": "Telecommunications", "T": "Telecommunications", "TMUS": "Telecommunications",
    
    # Materials
    "LIN": "Materials", "APD": "Materials", "NUE": "Materials", "SHW": "Materials", "DD": "Materials",
    
    # ETFs / Index Trackers
    "SPY": "ETF", "VOO": "ETF", "QQQ": "ETF", "DIA": "ETF", "IWM": "ETF",
    
    # International ADRs
    "RIO": "International", "BHP": "International", "NVS": "International",
    "SONY": "International", "TM": "International", "SAP": "International",
    "UL": "International", "UN": "International", "TD": "International", "RY": "International"
}


In [45]:
def filter_economically_related_pairs(pairs, sector_classification=None):
    """
    Filters cointegrated pairs to keep only those with economic relationships.
    
    Economic relationship criteria:
    - Same sub-sector (strongest relationship)
    - Same broad sector with related business models
    - Supply chain relationships
    - Competitive dynamics
    
    Parameters:
    -----------
    pairs : list
        List of tuples from find_cointegrated_pairs()
        Format: [(ticker1, ticker2, stats_dict), ...]
    sector_classification : dict, optional
        Custom sector mapping. If None, uses default SECTOR_CLASSIFICATION
    
    Returns:
    --------
    list of tuples
        Filtered pairs with economic relationships
        Format: [(ticker1, ticker2, stats_dict, relationship_type), ...]
    """
    
    if sector_classification is None:
        sector_classification = SECTOR_CLASSIFICATION
    
    economically_related_pairs = []
    
    print(f"\nFiltering {len(pairs)} cointegrated pairs for economic relationships...")
    print("=" * 80)
    
    for ticker1, ticker2, stats in pairs:
        # Get sectors for both tickers
        sector1 = sector_classification.get(ticker1, 'Unknown')
        sector2 = sector_classification.get(ticker2, 'Unknown')
        
        # Skip if either ticker not classified
        if sector1 == 'Unknown' or sector2 == 'Unknown':
            print(f"✗ {ticker1} - {ticker2}: Unclassified ticker(s)")
            print(f"  {ticker1}: {sector1}")
            print(f"  {ticker2}: {sector2}")
            continue
        
        # Extract broad sector and sub-sector
        broad1 = sector1.split(' - ')[0]
        broad2 = sector2.split(' - ')[0]
        
        relationship_type = None
        
        # Check for exact sub-sector match (strongest relationship)
        if sector1 == sector2:
            relationship_type = "Same Sub-Sector"
            economically_related_pairs.append((ticker1, ticker2, stats, relationship_type))
            print(f"✓ {ticker1} - {ticker2}: {relationship_type}")
            print(f"  Sector: {sector1}")
            print(f"  Correlation: {stats['correlation']:.4f} | Hedge Ratio: {stats['hedge_ratio']:.4f}")
            
        # Check for same broad sector (related business models)
        elif broad1 == broad2:
            relationship_type = f"Same Broad Sector ({broad1})"
            economically_related_pairs.append((ticker1, ticker2, stats, relationship_type))
            print(f"✓ {ticker1} - {ticker2}: {relationship_type}")
            print(f"  {ticker1}: {sector1}")
            print(f"  {ticker2}: {sector2}")
            print(f"  Correlation: {stats['correlation']:.4f} | Hedge Ratio: {stats['hedge_ratio']:.4f}")
            
        # Check for cross-sector economic relationships
        else:
            # Energy companies and energy-intensive industries
            energy_intensive = ['Industrials - Airlines', 'Consumer - Traditional Automotive']
            energy_producers = ['Energy - Oil & Gas Integrated', 'Energy - Oil & Gas Exploration']
            
            if (sector1 in energy_producers and sector2 in energy_intensive) or \
               (sector2 in energy_producers and sector1 in energy_intensive):
                relationship_type = "Supply Chain (Energy)"
                economically_related_pairs.append((ticker1, ticker2, stats, relationship_type))
                print(f"✓ {ticker1} - {ticker2}: {relationship_type}")
                print(f"  {ticker1}: {sector1}")
                print(f"  {ticker2}: {sector2}")
                
            # Retail and consumer goods
            elif (broad1 == 'Consumer' and broad2 == 'Consumer Staples') or \
                 (broad1 == 'Consumer Staples' and broad2 == 'Consumer'):
                relationship_type = "Related Consumer Sectors"
                economically_related_pairs.append((ticker1, ticker2, stats, relationship_type))
                print(f"✓ {ticker1} - {ticker2}: {relationship_type}")
                print(f"  {ticker1}: {sector1}")
                print(f"  {ticker2}: {sector2}")
                
            # Tech ecosystem relationships
            elif broad1 == 'Tech' and broad2 == 'Tech':
                relationship_type = "Tech Ecosystem"
                economically_related_pairs.append((ticker1, ticker2, stats, relationship_type))
                print(f"✓ {ticker1} - {ticker2}: {relationship_type}")
                print(f"  {ticker1}: {sector1}")
                print(f"  {ticker2}: {sector2}")
                
            else:
                continue
                #print(f"✗ {ticker1} - {ticker2}: No clear economic relationship")
                #print(f"  {ticker1}: {sector1}")
                #print(f"  {ticker2}: {sector2}")
    
    print("\n" + "=" * 80)
    print(f"RESULTS: {len(economically_related_pairs)} pairs with economic relationships")
    print(f"Filtered out: {len(pairs) - len(economically_related_pairs)} spurious pairs")
    print("=" * 80)

    pairs_list = []
    for pair in economically_related_pairs:
        t1, t2 = pair[0], pair[1]   # o: t1, t2, *_ = pair
        pairs_list.append([t1, t2])
    
    return economically_related_pairs, pairs_list

### Encontrar pares cointegrados con relación económica en periodo de train

In [None]:
print(f'Periodo de train: {15*0.6} años')
print(f'Periodo de validation: {15*0.2} años')
print(f'Periodo de test: {15*0.2} años')

Periodo de train: 9.0 años
Periodo de validation: 3.0 años
Periodo de test: 3.0 años


In [34]:
pairs = find_cointegrated_pairs(
        tickers, 
        correlation_threshold=0.7,
        adf_pvalue_threshold=0.05,
        lookback_years=9,
        end_date=datetime.now() -  timedelta(days=7*365)
    )

Fetching 9 years of data for 76 tickers...



1 Failed download:
['TOT']: YFPricesMissingError('possibly delisted; no price data found  (1d 2009-11-17 21:00:37.870221 -> 2018-11-15 21:00:37.870221) (Yahoo error = "Data doesn\'t exist for startDate = 1258509637, endDate = 1542333637")')


Valid tickers with sufficient data: 72
Data shape: (2063, 72) (rows: 2063, tickers: 72)

Step 1: Testing individual series for non-stationarity I(1)...
  ✓ AAPL: Non-stationary (p=0.9377) - I(1)
  ✓ ADBE: Non-stationary (p=0.9974) - I(1)
  ✓ AEP: Non-stationary (p=0.9501) - I(1)
  ✓ AMGN: Non-stationary (p=0.8239) - I(1)
  ✓ AMZN: Non-stationary (p=0.9934) - I(1)
  ✓ APD: Non-stationary (p=0.8478) - I(1)
  ✓ AXP: Non-stationary (p=0.8470) - I(1)
  ✓ BAC: Non-stationary (p=0.9455) - I(1)
  ✓ BK: Non-stationary (p=0.8005) - I(1)
  ✓ BMY: Non-stationary (p=0.4615) - I(1)
  ✓ BP: Non-stationary (p=0.3252) - I(1)
  ✓ C: Non-stationary (p=0.6648) - I(1)
  ✓ CAT: Non-stationary (p=0.6766) - I(1)
  ✓ CL: Non-stationary (p=0.3042) - I(1)
  ✓ COP: Non-stationary (p=0.2968) - I(1)
  ✓ CSCO: Non-stationary (p=0.9886) - I(1)
  ✓ CVX: Non-stationary (p=0.2010) - I(1)
  ✓ D: Non-stationary (p=0.6325) - I(1)
  ✓ DD: Non-stationary (p=0.6464) - I(1)
  ✓ DE: Non-stationary (p=0.8960) - I(1)
  ✓ DIA: Non

In [46]:
# Filter for economic relationships
if pairs:
    print("\n" + "="*80)
    print("FILTERING FOR ECONOMIC RELATIONSHIPS")
    print("="*80)
        
    economically_related, pairs_list = filter_economically_related_pairs(pairs)
        
    # Display final results
    if economically_related:
        print("\n" + "="*80)
        print("FINAL ECONOMICALLY RELATED COINTEGRATED PAIRS")
        print("="*80)
        for ticker1, ticker2, stats, relationship in economically_related:
            print(f"\n{ticker1} <-> {ticker2}")
            print(f"  Relationship:    {relationship}")
            print(f"  Correlation:     {stats['correlation']:.4f}")
            print(f"  Hedge Ratio:     {stats['hedge_ratio']:.4f}")
            print(f"  ADF p-value:     {stats['adf_pvalue']:.6f}")
            print(f"  Spread Mean:     {stats['spread_mean']:.4f}")
            print(f"  Spread Std:      {stats['spread_std']:.4f}")
    else:
        print("\nNo economically related cointegrated pairs found.")
else:
    print("\nNo cointegrated pairs found with the given criteria.")


FILTERING FOR ECONOMIC RELATIONSHIPS

Filtering 129 cointegrated pairs for economic relationships...
✓ ADBE - CSCO: Same Sub-Sector
  Sector: Technology
  Correlation: 0.9747 | Hedge Ratio: 8.5504
✓ ADBE - MSFT: Same Sub-Sector
  Sector: Technology
  Correlation: 0.9920 | Hedge Ratio: 2.7454
✓ BAC - C: Same Sub-Sector
  Sector: Financials
  Correlation: 0.9660 | Hedge Ratio: 0.5624
✓ BP - SHEL: Same Sub-Sector
  Sector: Energy
  Correlation: 0.9497 | Hedge Ratio: 0.5596
✓ CL - KMB: Same Sub-Sector
  Sector: Consumer_Staples
  Correlation: 0.9719 | Hedge Ratio: 0.5155
✓ CSCO - MSFT: Same Sub-Sector
  Sector: Technology
  Correlation: 0.9777 | Hedge Ratio: 0.3085
✓ FDX - MMM: Same Sub-Sector
  Sector: Industrials
  Correlation: 0.9767 | Hedge Ratio: 1.4323
✓ KMB - MDLZ: Same Sub-Sector
  Sector: Consumer_Staples
  Correlation: 0.9712 | Hedge Ratio: 2.5244
✓ LOW - NKE: Same Sub-Sector
  Sector: Consumer_Discretionary
  Correlation: 0.9768 | Hedge Ratio: 1.3404
✓ MMM - UPS: Same Sub-Secto

In [49]:
pairs_list


[['ADBE', 'CSCO'],
 ['ADBE', 'MSFT'],
 ['BAC', 'C'],
 ['BP', 'SHEL'],
 ['CL', 'KMB'],
 ['CSCO', 'MSFT'],
 ['FDX', 'MMM'],
 ['KMB', 'MDLZ'],
 ['LOW', 'NKE'],
 ['MMM', 'UPS'],
 ['SPY', 'VOO']]

### Validación de cointegración en el periodo de test

In [19]:
pairs = find_cointegrated_pairs(
        tickers, 
        correlation_threshold=0.7,
        adf_pvalue_threshold=0.05,
        lookback_years=3, #periods of validation & test
        end_date=datetime.now()
    )

Fetching 3 years of data for 76 tickers...
Valid tickers with sufficient data: 75
Data shape: (752, 75) (rows: 752, tickers: 75)

Step 1: Testing individual series for non-stationarity I(1)...
  ✓ AAPL: Non-stationary (p=0.7732) - I(1)
  ✓ ABBV: Non-stationary (p=0.8993) - I(1)
  ✓ ADBE: Non-stationary (p=0.5539) - I(1)
  ✓ AEP: Non-stationary (p=0.9667) - I(1)
  ✓ AMGN: Non-stationary (p=0.6612) - I(1)
  ✓ AMZN: Non-stationary (p=0.7292) - I(1)
  ✓ APD: Non-stationary (p=0.0926) - I(1)
  ✓ AXP: Non-stationary (p=0.9592) - I(1)
  ✓ BAC: Non-stationary (p=0.9331) - I(1)
  ✓ BK: Non-stationary (p=0.9960) - I(1)
  ✓ BMY: Non-stationary (p=0.2128) - I(1)
  ✗ BP: Stationary (p=0.0267) - I(0), excluding
  ✓ C: Non-stationary (p=0.9848) - I(1)
  ✓ CAT: Non-stationary (p=0.9895) - I(1)
  ✓ CL: Non-stationary (p=0.5109) - I(1)
  ✗ COP: Stationary (p=0.0496) - I(0), excluding
  ✓ CSCO: Non-stationary (p=0.9772) - I(1)
  ✗ CVX: Stationary (p=0.0010) - I(0), excluding
  ✓ D: Non-stationary (p=0.68

In [30]:
# Filter for economic relationships
if pairs:
    print("\n" + "="*80)
    print("FILTERING FOR ECONOMIC RELATIONSHIPS")
    print("="*80)
        
    economically_related = filter_economically_related_pairs(pairs)
        
    # Display final results
    if economically_related:
        print("\n" + "="*80)
        print("FINAL ECONOMICALLY RELATED COINTEGRATED PAIRS")
        print("="*80)
        for ticker1, ticker2, stats, relationship in economically_related:
            print(f"\n{ticker1} <-> {ticker2}")
            print(f"  Relationship:    {relationship}")
            print(f"  Correlation:     {stats['correlation']:.4f}")
            print(f"  Hedge Ratio:     {stats['hedge_ratio']:.4f}")
            print(f"  ADF p-value:     {stats['adf_pvalue']:.6f}")
            print(f"  Spread Mean:     {stats['spread_mean']:.4f}")
            print(f"  Spread Std:      {stats['spread_std']:.4f}")
    else:
        print("\nNo economically related cointegrated pairs found.")
else:
    print("\nNo cointegrated pairs found with the given criteria.")


FILTERING FOR ECONOMIC RELATIONSHIPS

Filtering 129 cointegrated pairs for economic relationships...
✓ ADBE - CSCO: Same Sub-Sector
  Sector: Technology
  Correlation: 0.9747 | Hedge Ratio: 8.5504
✓ ADBE - MSFT: Same Sub-Sector
  Sector: Technology
  Correlation: 0.9920 | Hedge Ratio: 2.7454
✓ BAC - C: Same Sub-Sector
  Sector: Financials
  Correlation: 0.9660 | Hedge Ratio: 0.5624
✓ BP - SHEL: Same Sub-Sector
  Sector: Energy
  Correlation: 0.9497 | Hedge Ratio: 0.5596
✓ CL - KMB: Same Sub-Sector
  Sector: Consumer_Staples
  Correlation: 0.9719 | Hedge Ratio: 0.5155
✓ CSCO - MSFT: Same Sub-Sector
  Sector: Technology
  Correlation: 0.9777 | Hedge Ratio: 0.3085
✓ FDX - MMM: Same Sub-Sector
  Sector: Industrials
  Correlation: 0.9767 | Hedge Ratio: 1.4323
✓ KMB - MDLZ: Same Sub-Sector
  Sector: Consumer_Staples
  Correlation: 0.9712 | Hedge Ratio: 2.5244
✓ LOW - NKE: Same Sub-Sector
  Sector: Consumer_Discretionary
  Correlation: 0.9768 | Hedge Ratio: 1.3404
✓ MMM - UPS: Same Sub-Secto