# Trading Pairs and Signal Exploration

This notebook focuses on:

1. Identifying trading pairs using Granger causality
2. Exploring different trend detection methods
3. Analyzing signal characteristics
4. Testing different parameters

Use this notebook to understand how trading signals are generated and optimize parameters.

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import yaml
import warnings
from pathlib import Path
warnings.filterwarnings('ignore')

from volatility_estimators import calculate_volatility_for_assets
from volatility_clustering import cluster_assets_by_volatility
from granger_causality import GrangerCausalityAnalyzer, identify_trading_pairs
from signal_generator import SignalGenerator, SignalAnalyzer

plt.style.use('seaborn-v0_8-darkgrid')
print("Libraries loaded successfully!")

## Load Configuration and Data

In [None]:
# Load configuration
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded")
print(f"Assets: {config['data']['assets']}")
print(f"Data period: {config['data']['start_date']} to {config['data']['end_date']}")

In [None]:
# Download data
tickers = config['data']['assets']
start_date = config['data']['start_date']
end_date = config['data']['end_date']

print(f"Downloading data for {len(tickers)} tickers...\n")

data_dict = {}
for ticker in tickers:
    try:
        df = yf.download(ticker, start=start_date, end=end_date, progress=False)
        if len(df) > 0:
            data_dict[ticker] = df
            print(f"  {ticker}: {len(df)} days")
    except Exception as e:
        print(f"  {ticker}: Error - {e}")

print(f"\nData downloaded for {len(data_dict)} assets")

In [None]:
# Calculate volatility
print("Calculating volatility...")
volatility_df = calculate_volatility_for_assets(
    data_dict,
    estimator=config['volatility']['primary_estimator'],
    rolling_window=config['volatility']['rolling_window']
)
volatility_df = volatility_df.dropna()
print(f"Volatility calculated: {len(volatility_df)} data points")

In [None]:
# Perform clustering
print("Clustering assets by volatility...")
clustering, mid_cluster_members = cluster_assets_by_volatility(
    volatility_df,
    n_clusters=config['clustering']['n_clusters'],
    random_state=config['clustering']['random_state'],
    target_cluster='mid'
)
print(f"\nMid-cluster members: {mid_cluster_members}")

## Granger Causality Testing with Different Lags

Test how different lag values affect the identification of trading pairs.

In [None]:
# Test different lags
test_lags = [3, 5, 7, 10, 15, 20]
lag_results = {}

print("Testing different lag values...\n")

for lag in test_lags:
    print(f"Testing lag = {lag}...")
    
    analyzer = GrangerCausalityAnalyzer(
        max_lag=30,
        significance_level=0.05
    )
    
    results = analyzer.test_all_pairs(
        volatility_df[mid_cluster_members],
        verbose=False
    )
    
    if len(results) > 0:
        # Filter to target lag (±2 days)
        filtered = results[
            (results['optimal_lag'] >= lag - 2) & 
            (results['optimal_lag'] <= lag + 2)
        ]
        
        # Remove circular relationships
        filtered = analyzer.remove_circular_relationships(filtered)
        
        lag_results[lag] = filtered
        print(f"  Found {len(filtered)} pairs at lag {lag} (±2)\n")
    else:
        lag_results[lag] = pd.DataFrame()
        print(f"  No pairs found\n")

print("Lag testing complete!")

In [None]:
# Visualize number of pairs by lag
n_pairs_by_lag = {lag: len(df) for lag, df in lag_results.items()}

plt.figure(figsize=(10, 6))
plt.bar(n_pairs_by_lag.keys(), n_pairs_by_lag.values(), 
        color='steelblue', alpha=0.7, edgecolor='black')
plt.xlabel('Lag (days)', fontsize=12)
plt.ylabel('Number of Trading Pairs', fontsize=12)
plt.title('Trading Pairs Identified by Lag Value', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

print("\nOptimal lag selection:")
if n_pairs_by_lag:
    best_lag = max(n_pairs_by_lag, key=n_pairs_by_lag.get)
    print(f"  Lag {best_lag} yields {n_pairs_by_lag[best_lag]} pairs")

In [None]:
# Display pairs for the optimal lag
if len(lag_results[config['granger']['optimal_lag']]) > 0:
    print(f"\nTrading Pairs at Lag = {config['granger']['optimal_lag']}:")
    print("="*80)
    display(lag_results[config['granger']['optimal_lag']])
else:
    print(f"\nNo trading pairs found at lag = {config['granger']['optimal_lag']}")
    print("Try adjusting the lag value or significance level in config.yaml")

## Use the Configured Lag for Trading Pairs

In [None]:
# Identify trading pairs with configured parameters
print("Identifying trading pairs with configured parameters...\n")

trading_pairs, analyzer = identify_trading_pairs(
    volatility_df,
    mid_cluster_members,
    target_lag=config['granger']['optimal_lag'],
    significance_level=config['granger']['alpha'],
    max_lag=config['granger']['max_lag'],
    remove_circular=True
)

if len(trading_pairs) > 0:
    print(f"\nFound {len(trading_pairs)} trading pairs")
    
    # Visualize network
    analyzer.plot_causality_network(trading_pairs, figsize=(12, 8))
else:
    print("\nNo trading pairs found with current parameters.")

## Compare Different Trend Detection Methods

Test how different trend detection methods affect signal generation.

In [None]:
if len(trading_pairs) == 0:
    print("Cannot generate signals without trading pairs.")
else:
    # Define trend methods to test
    trend_methods = [
        {'name': 'SMA Crossover', 'method': 'sma_crossover', 'params': {'fast_period': 5, 'slow_period': 20}},
        {'name': 'Linear Regression', 'method': 'linear_regression', 'params': {'window': 20}},
        {'name': 'MACD', 'method': 'macd', 'params': {'fast': 12, 'slow': 26, 'signal': 9}},
        {'name': 'Rate of Change', 'method': 'rate_of_change', 'params': {'window': 10}}
    ]
    
    # Generate signals with each method
    all_method_signals = {}
    
    print("Generating signals with different trend methods...\n")
    
    for trend_config in trend_methods:
        print(f"Method: {trend_config['name']}")
        
        signal_gen = SignalGenerator(
            trend_method=trend_config['method'],
            trend_params=trend_config['params']
        )
        
        signals = signal_gen.generate_signals_for_all_pairs(volatility_df, trading_pairs)
        all_method_signals[trend_config['name']] = signals
        
        # Get statistics
        stats = SignalAnalyzer.get_signal_statistics(signals)
        print(f"  Average trades per pair: {stats['n_trades'].mean():.1f}")
        print(f"  Average % active: {stats['pct_active'].mean():.1f}%\n")
    
    print("Signal generation complete!")

In [None]:
# Compare signal statistics across methods
if len(trading_pairs) > 0:
    comparison_data = []
    
    for method_name, signals in all_method_signals.items():
        stats = SignalAnalyzer.get_signal_statistics(signals)
        
        comparison_data.append({
            'Method': method_name,
            'Avg Trades/Pair': stats['n_trades'].mean(),
            'Avg % Active': stats['pct_active'].mean(),
            'Total Buy Signals': stats['n_buy'].sum(),
            'Total Sell Signals': stats['n_sell'].sum()
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    
    print("\nTrend Method Comparison:")
    print("="*80)
    display(comparison_df)
    
    # Visualize comparison
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    comparison_df.plot(x='Method', y='Avg Trades/Pair', kind='bar', 
                       ax=axes[0], color='steelblue', legend=False)
    axes[0].set_title('Average Trades per Pair', fontweight='bold')
    axes[0].set_ylabel('Number of Trades')
    axes[0].set_xlabel('')
    axes[0].grid(True, alpha=0.3, axis='y')
    plt.setp(axes[0].xaxis.get_majorticklabels(), rotation=45, ha='right')
    
    comparison_df.plot(x='Method', y='Avg % Active', kind='bar', 
                       ax=axes[1], color='coral', legend=False)
    axes[1].set_title('Average % Time in Position', fontweight='bold')
    axes[1].set_ylabel('Percentage (%)')
    axes[1].set_xlabel('')
    axes[1].grid(True, alpha=0.3, axis='y')
    plt.setp(axes[1].xaxis.get_majorticklabels(), rotation=45, ha='right')
    
    plt.tight_layout()
    plt.show()

## Detailed Signal Analysis for Specific Pair

Deep dive into signals for a specific trading pair.

In [None]:
if len(trading_pairs) > 0:
    # Select first pair for analysis
    selected_pair = trading_pairs.iloc[0]
    pair_name = f"{selected_pair['predictor']}->{selected_pair['target']}"
    
    print(f"Analyzing pair: {pair_name}")
    print(f"Predictor: {selected_pair['predictor']}")
    print(f"Target: {selected_pair['target']}")
    print(f"Optimal lag: {selected_pair['optimal_lag']} days")
    print(f"P-value: {selected_pair['p_value']:.4f}\n")
    
    # Compare signals across all methods for this pair
    fig, axes = plt.subplots(len(trend_methods) + 1, 1, 
                             figsize=(14, 4*(len(trend_methods)+1)))
    
    # Plot target price
    target_ticker = selected_pair['target']
    target_price = data_dict[target_ticker]['Close']
    
    axes[0].plot(target_price.index, target_price.values, 
                color='black', linewidth=2)
    axes[0].set_title(f'{target_ticker} Price', fontsize=12, fontweight='bold')
    axes[0].set_ylabel('Price ($)')
    axes[0].grid(True, alpha=0.3)
    
    # Plot signals for each method
    for idx, trend_config in enumerate(trend_methods, start=1):
        signals_df = all_method_signals[trend_config['name']][pair_name]
        
        # Get signal counts
        n_buy = (signals_df['signal'] == 1).sum()
        n_sell = (signals_df['signal'] == -1).sum()
        n_hold = (signals_df['signal'] == 0).sum()
        
        # Plot signals
        axes[idx].plot(signals_df.index, signals_df['signal'], 
                      linewidth=2, color='purple')
        axes[idx].fill_between(signals_df.index, 0, signals_df['signal'],
                              where=signals_df['signal'] > 0, 
                              alpha=0.3, color='green', label='BUY')
        axes[idx].fill_between(signals_df.index, 0, signals_df['signal'],
                              where=signals_df['signal'] < 0, 
                              alpha=0.3, color='red', label='SELL')
        axes[idx].axhline(y=0, color='black', linestyle='--', alpha=0.5)
        
        axes[idx].set_title(
            f'{trend_config["name"]} - Buy: {n_buy} | Sell: {n_sell} | Hold: {n_hold}',
            fontsize=12, fontweight='bold'
        )
        axes[idx].set_ylabel('Signal')
        axes[idx].legend(loc='upper right')
        axes[idx].grid(True, alpha=0.3)
    
    axes[-1].set_xlabel('Date')
    plt.tight_layout()
    plt.show()

## Parameter Sensitivity Analysis

Test how sensitive signals are to parameter changes.

In [None]:
if len(trading_pairs) > 0:
    # Test SMA crossover with different periods
    fast_periods = [3, 5, 7, 10]
    slow_periods = [15, 20, 25, 30]
    
    sensitivity_results = []
    
    print("Testing SMA parameter sensitivity...\n")
    
    for fast in fast_periods:
        for slow in slow_periods:
            if fast >= slow:
                continue
            
            signal_gen = SignalGenerator(
                trend_method='sma_crossover',
                trend_params={'fast_period': fast, 'slow_period': slow}
            )
            
            signals = signal_gen.generate_signals_for_all_pairs(volatility_df, trading_pairs)
            stats = SignalAnalyzer.get_signal_statistics(signals)
            
            sensitivity_results.append({
                'Fast': fast,
                'Slow': slow,
                'Avg Trades': stats['n_trades'].mean(),
                'Avg % Active': stats['pct_active'].mean()
            })
    
    sensitivity_df = pd.DataFrame(sensitivity_results)
    
    print("\nSMA Parameter Sensitivity Results:")
    print("="*80)
    display(sensitivity_df)
    
    # Visualize as heatmap
    pivot_trades = sensitivity_df.pivot(index='Fast', columns='Slow', values='Avg Trades')
    pivot_active = sensitivity_df.pivot(index='Fast', columns='Slow', values='Avg % Active')
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    sns.heatmap(pivot_trades, annot=True, fmt='.1f', cmap='YlOrRd', ax=axes[0])
    axes[0].set_title('Average Trades per Pair', fontweight='bold')
    axes[0].set_xlabel('Slow Period')
    axes[0].set_ylabel('Fast Period')
    
    sns.heatmap(pivot_active, annot=True, fmt='.1f', cmap='YlGnBu', ax=axes[1])
    axes[1].set_title('Average % Time Active', fontweight='bold')
    axes[1].set_xlabel('Slow Period')
    axes[1].set_ylabel('Fast Period')
    
    plt.tight_layout()
    plt.show()

## Key Takeaways

### Lag Selection
- Different lag values identify different numbers of trading pairs
- The paper recommended lag=5 as optimal
- Test multiple lags to find robust relationships

### Trend Detection Methods
- **SMA Crossover**: Simple, widely used, generates moderate number of signals
- **Linear Regression**: Smooth, trend-based, fewer whipsaws
- **MACD**: Popular momentum indicator, responsive to changes
- **Rate of Change**: Direct measure of trend, can be noisy

### Parameter Sensitivity
- Faster parameters (shorter windows) = more trades, more whipsaws
- Slower parameters (longer windows) = fewer trades, smoother signals
- Balance between responsiveness and stability

### Recommendations
1. Test multiple lag values to find stable relationships
2. Start with Yang-Zhang volatility estimator
3. Use SMA crossover (5/20) as baseline
4. Consider ensemble of multiple methods
5. Always backtest before live trading