# Pragmatic Asset Allocation - Signal Analysis

This notebook analyzes the effectiveness and behavior of the six signal types in the Pragmatic Asset Allocation Model.

## Objectives:
- Validate signal generation logic
- Analyze signal distributions and persistence
- Test signal effectiveness in different market regimes
- Evaluate signal interactions and correlations

In [None]:
import sys
import os
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Load configuration
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded successfully")
print(f"Strategy: {config['strategy']['name']}")

## 1. Load Signals and Data

In [None]:
# Load signals and data
try:
    from signal_generation import PragmaticAssetAllocationSignals
    from data_acquisition import PragmaticAssetAllocationData
    
    # Load data
    data_acq = PragmaticAssetAllocationData()
    all_data = data_acq.load_cached_data()
    
    if all_data:
        # Generate signals
        signal_gen = PragmaticAssetAllocationSignals()
        signals_dict = signal_gen.generate_all_signals(all_data)
        
        print("Signals generated successfully")
        print(f"Signal types: {list(signals_dict.keys())}")
    else:
        print("No data available. Run data acquisition first.")
        signals_dict = {}
        
except ImportError as e:
    print(f"Could not import modules: {e}")
    signals_dict = {}

## 2. Momentum Signal Analysis

In [None]:
# Analyze momentum signals
if 'momentum' in signals_dict:
    momentum_signals = signals_dict['momentum']
    print("=== MOMENTUM SIGNAL ANALYSIS ===\n")
    
    # Signal completeness
    print(f"Momentum signals shape: {momentum_signals.shape}")
    print(f"Date range: {momentum_signals.index.min()} to {momentum_signals.index.max()}")
    print(f"Missing values: {momentum_signals.isnull().sum().sum()}")
    
    # Analyze momentum rankings
    momentum_cols = [col for col in momentum_signals.columns if 'Momentum' in col]
    
    if momentum_cols:
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        # Momentum distributions
        momentum_data = momentum_signals[momentum_cols].dropna()
        
        for i, col in enumerate(momentum_cols):
            if i < 3:  # Only plot first 3 assets
                ax = axes[i//2, i%2]
                momentum_signals[col].dropna().plot(ax=ax, linewidth=1)
                ax.set_title(f'{col.replace("_Momentum", "")} 12M Momentum')
                ax.set_ylabel('Momentum (%)')
                ax.grid(True, alpha=0.3)
        
        # Momentum ranking distribution
        if 'Momentum_Rankings' in momentum_signals.columns:
            rankings = momentum_signals['Momentum_Rankings'].dropna()
            rankings.value_counts().sort_index().plot(kind='bar', ax=axes[1,1])
            axes[1,1].set_title('Momentum Ranking Distribution')
            axes[1,1].set_xlabel('Rank')
            axes[1,1].set_ylabel('Frequency')
            axes[1,1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        # Selection analysis
        if 'Top_1_Asset' in momentum_signals.columns:
            print("\nTop Asset Selections:")
            top_assets = momentum_signals[['Top_1_Asset', 'Top_2_Asset']].dropna()
            
            for i in [1, 2]:
                col = f'Top_{i}_Asset'
                if col in top_assets.columns:
                    asset_counts = top_assets[col].value_counts()
                    print(f"\nTop {i} Asset Selection Frequency:")
                    for asset, count in asset_counts.items():
                        pct = count / len(top_assets) * 100
                        print(f"  {asset}: {count} times ({pct:.1f}%)")
    
    # Momentum effectiveness test
    print("\nMomentum Signal Effectiveness:")
    lookback_days = 252  # 1 year forward
    
    if 'risky_assets' in all_data:
        risky_data = all_data['risky_assets']
        
        momentum_effectiveness = []
        
        for asset in config['assets']['risky']:
            ticker = asset['ticker']
            momentum_col = f'{ticker}_Momentum'
            
            if (momentum_col in momentum_signals.columns and 
                ticker in risky_data.columns.levels[0]):
                
                # Get future returns
                future_returns = risky_data[ticker]['Adj Close'].pct_change(lookback_days).shift(-lookback_days)
                
                # Combine with momentum
                combined = pd.DataFrame({
                    'momentum': momentum_signals[momentum_col],
                    'future_return': future_returns
                }).dropna()
                
                if len(combined) > 30:
                    corr = combined.corr().iloc[0,1]
                    momentum_effectiveness.append({
                        'asset': ticker,
                        'correlation': corr,
                        'samples': len(combined)
                    })
        
        if momentum_effectiveness:
            effectiveness_df = pd.DataFrame(momentum_effectiveness)
            print("\nMomentum Predictability (correlation with future 1Y returns):")
            for _, row in effectiveness_df.iterrows():
                status = "✅" if row['correlation'] > 0.1 else "⚠️" if row['correlation'] > 0 else "❌"
                print(f"  {row['asset']}: {row['correlation']:.3f} ({row['samples']} samples) {status}")
else:
    print("Momentum signals not available")

## 3. Trend Filter Analysis

In [None]:
# Analyze trend filter signals
if 'trend' in signals_dict:
    trend_signals = signals_dict['trend']
    print("=== TREND FILTER ANALYSIS ===\n")
    
    # Trend signal summary
    trend_cols = [col for col in trend_signals.columns if 'Trend_Up' in col]
    
    if trend_cols:
        print("Trend Filter Signal Summary:")
        for col in trend_cols:
            signal_data = trend_signals[col].dropna()
            if len(signal_data) > 0:
                true_pct = signal_data.mean()
                asset_name = col.replace('_Trend_Up', '')
                print(f"  {asset_name}: {true_pct:.1%} of days trending up")
        
        # Trend persistence analysis
        print("\nTrend Persistence Analysis:")
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        for i, col in enumerate(trend_cols[:4]):  # Plot first 4
            ax = axes[i//2, i%2]
            
            # Calculate trend streaks
            signal = trend_signals[col].dropna()
            streak_changes = signal.diff().fillna(0)
            streak_starts = signal.index[streak_changes != 0]
            
            # Plot trend signal over time
            signal.astype(int).plot(ax=ax, linewidth=1)
            ax.set_title(f'{col.replace("_Trend_Up", "")} Trend Signal')
            ax.set_ylabel('Trending Up (1=Yes, 0=No)')
            ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        # Trend effectiveness test
        print("\nTrend Filter Effectiveness:")
        
        if 'risky_assets' in all_data:
            risky_data = all_data['risky_assets']
            
            trend_effectiveness = []
            
            for asset in config['assets']['risky']:
                ticker = asset['ticker']
                trend_col = f'{ticker}_Trend_Up'
                
                if (trend_col in trend_signals.columns and 
                    ticker in risky_data.columns.levels[0]):
                    
                    # Get future returns when trend is up vs down
                    prices = risky_data[ticker]['Adj Close']
                    future_returns = prices.pct_change(21).shift(-21)  # 1 month forward
                    
                    combined = pd.DataFrame({
                        'trend_up': trend_signals[trend_col],
                        'future_return': future_returns
                    }).dropna()
                    
                    if len(combined) > 30:
                        up_returns = combined[combined['trend_up'] == True]['future_return']
                        down_returns = combined[combined['trend_up'] == False]['future_return']
                        
                        if len(up_returns) > 0 and len(down_returns) > 0:
                            up_avg = up_returns.mean()
                            down_avg = down_returns.mean()
                            diff = up_avg - down_avg
                            
                            trend_effectiveness.append({
                                'asset': ticker,
                                'up_return': up_avg,
                                'down_return': down_avg,
                                'difference': diff,
                                'samples_up': len(up_returns),
                                'samples_down': len(down_returns)
                            })
            
            if trend_effectiveness:
                effectiveness_df = pd.DataFrame(trend_effectiveness)
                print("\nTrend Filter Performance (1-month forward returns):")
                for _, row in effectiveness_df.iterrows():
                    status = "✅" if row['difference'] > 0 else "❌"
                    print(f"  {row['asset']}: Up {row['up_return']:.2%} vs Down {row['down_return']:.2%} ({row['difference']:.2%}) {status}")
    else:
        print("Trend signals not available")

## 4. Market Health Signal Analysis

In [None]:
# Analyze market health signals
if 'market_health' in signals_dict:
    market_health_signals = signals_dict['market_health']
    print("=== MARKET HEALTH SIGNAL ANALYSIS ===\n")
    
    # Market health summary
    if 'Market_Stress_Signal' in market_health_signals.columns:
        stress_signal = market_health_signals['Market_Stress_Signal'].dropna()
        stress_pct = stress_signal.mean()
        
        print(f"Market stress signal frequency: {stress_pct:.1%}")
        print(f"Total stress periods: {stress_signal.sum()}")
        
        # Cash allocation analysis
        if 'Cash_Allocation_Pct' in market_health_signals.columns:
            cash_alloc = market_health_signals['Cash_Allocation_Pct'].dropna()
            avg_cash = cash_alloc.mean()
            max_cash = cash_alloc.max()
            
            print(f"Average cash allocation during stress: {avg_cash:.1%}")
            print(f"Maximum cash allocation: {max_cash:.1%}")
        
        # Visualize market health signals
        fig, axes = plt.subplots(2, 1, figsize=(15, 10))
        
        # Stress signal over time
        stress_signal.astype(int).plot(ax=axes[0], linewidth=1, color='red')
        axes[0].set_title('Market Stress Signal (2+ Risky Assets in Downtrend)')
        axes[0].set_ylabel('Stress (1=Yes, 0=No)')
        axes[0].grid(True, alpha=0.3)
        
        # Cash allocation over time
        if 'Cash_Allocation_Pct' in market_health_signals.columns:
            market_health_signals['Cash_Allocation_Pct'].plot(ax=axes[1], linewidth=1, color='blue')
            axes[1].set_title('Cash Allocation Percentage')
            axes[1].set_ylabel('Cash %')
            axes[1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        # Market health effectiveness
        print("\nMarket Health Signal Effectiveness:")
        
        if 'risky_assets' in all_data:
            risky_data = all_data['risky_assets']
            
            # Calculate market returns during stress vs normal periods
            market_returns = pd.DataFrame()
            for asset in config['assets']['risky']:
                ticker = asset['ticker']
                if ticker in risky_data.columns.levels[0]:
                    market_returns[ticker] = risky_data[ticker]['Adj Close'].pct_change()
            
            if not market_returns.empty:
                market_returns['Average'] = market_returns.mean(axis=1)
                
                # Combine with stress signal
                combined = pd.DataFrame({
                    'stress': stress_signal,
                    'market_return': market_returns['Average']
                }).dropna()
                
                stress_returns = combined[combined['stress'] == True]['market_return']
                normal_returns = combined[combined['stress'] == False]['market_return']
                
                if len(stress_returns) > 0 and len(normal_returns) > 0:
                    print(f"Market returns during stress: {stress_returns.mean():.2%} daily")
                    print(f"Market returns during normal: {normal_returns.mean():.2%} daily")
                    print(f"Difference: {(normal_returns.mean() - stress_returns.mean()):.2%} daily")
                    
                    # Statistical test
                    from scipy import stats
                    t_stat, p_value = stats.ttest_ind(stress_returns, normal_returns)
                    print(f"T-test: t={t_stat:.2f}, p={p_value:.4f}")
                    
                    if p_value < 0.05:
                        print("✅ Statistically significant difference in returns")
                    else:
                        print("⚠️ No statistically significant difference")
    else:
        print("Market health signals not available")

## 5. Yield Curve Signal Analysis

In [None]:
# Analyze yield curve signals
if 'yield_curve' in signals_dict:
    yield_curve_signals = signals_dict['yield_curve']
    print("=== YIELD CURVE SIGNAL ANALYSIS ===\n")
    
    # Yield curve signal summary
    if 'Yield_Curve_Inverted_Persistent' in yield_curve_signals.columns:
        inversion_signal = yield_curve_signals['Yield_Curve_Inverted_Persistent'].dropna()
        inversion_pct = inversion_signal.mean()
        
        print(f"Yield curve inversion frequency: {inversion_pct:.1%}")
        print(f"Total inversion periods: {inversion_signal.sum()}")
        
        # Full cash signal
        if 'Full_Cash_Signal' in yield_curve_signals.columns:
            cash_signal = yield_curve_signals['Full_Cash_Signal'].dropna()
            cash_pct = cash_signal.mean()
            print(f"Full cash signal frequency: {cash_pct:.1%}")
        
        # Visualize yield curve signals
        fig, axes = plt.subplots(2, 1, figsize=(15, 10))
        
        # Yield curve spread
        if 'Yield_Curve_Spread' in yield_curve_signals.columns:
            yield_curve_signals['Yield_Curve_Spread'].plot(ax=axes[0], linewidth=1, color='purple')
            axes[0].axhline(y=0, color='red', linestyle='--', alpha=0.7)
            axes[0].set_title('Yield Curve Spread (10Y - 3M)')
            axes[0].set_ylabel('Spread (bps)')
            axes[0].grid(True, alpha=0.3)
            
            # Highlight inversion periods
            inversion_periods = yield_curve_signals[yield_curve_signals['Yield_Curve_Spread'] < 0]
            if not inversion_periods.empty:
                axes[0].fill_between(inversion_periods.index, 
                                   inversion_periods['Yield_Curve_Spread'], 0, 
                                   color='red', alpha=0.3)
        
        # Inversion signal
        inversion_signal.astype(int).plot(ax=axes[1], linewidth=1, color='red')
        axes[1].set_title('Yield Curve Inversion Signal')
        axes[1].set_ylabel('Inverted (1=Yes, 0=No)')
        axes[1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        # Yield curve effectiveness analysis
        print("\nYield Curve Signal Effectiveness:")
        
        if 'macroeconomic' in all_data:
            macro_data = all_data['macroeconomic']
            
            # Analyze market performance around inversions
            inversion_dates = inversion_signal[inversion_signal == True].index
            
            if len(inversion_dates) > 0 and 'risky_assets' in all_data:
                risky_data = all_data['risky_assets']
                
                # Calculate market returns before, during, and after inversions
                pre_inversion_returns = []
                post_inversion_returns = []
                
                lookback_days = 252  # 1 year
                
                for inv_date in inversion_dates:
                    # Pre-inversion returns (year before)
                    start_date = inv_date - pd.Timedelta(days=lookback_days)
                    
                    if start_date in risky_data.index:
                        # Calculate average market return
                        market_returns = []
                        for asset in config['assets']['risky']:
                            ticker = asset['ticker']
                            if ticker in risky_data.columns.levels[0]:
                                asset_prices = risky_data[ticker]['Adj Close']
                                if start_date in asset_prices.index and inv_date in asset_prices.index:
                                    ret = (asset_prices.loc[inv_date] / asset_prices.loc[start_date]) - 1
                                    market_returns.append(ret)
                        
                        if market_returns:
                            pre_inversion_returns.append(np.mean(market_returns))
                    
                    # Post-inversion returns (year after)
                    end_date = inv_date + pd.Timedelta(days=lookback_days)
                    
                    if end_date in risky_data.index:
                        market_returns = []
                        for asset in config['assets']['risky']:
                            ticker = asset['ticker']
                            if ticker in risky_data.columns.levels[0]:
                                asset_prices = risky_data[ticker]['Adj Close']
                                if inv_date in asset_prices.index and end_date in asset_prices.index:
                                    ret = (asset_prices.loc[end_date] / asset_prices.loc[inv_date]) - 1
                                    market_returns.append(ret)
                        
                        if market_returns:
                            post_inversion_returns.append(np.mean(market_returns))
                
                if pre_inversion_returns and post_inversion_returns:
                    pre_avg = np.mean(pre_inversion_returns)
                    post_avg = np.mean(post_inversion_returns)
                    
                    print(f"Average market return 1Y before inversion: {pre_avg:.1%}")
                    print(f"Average market return 1Y after inversion: {post_avg:.1%}")
                    print(f"Difference: {post_avg - pre_avg:.1%}")
                    
                    if post_avg < pre_avg:
                        print("✅ Yield curve inversion predicts market downturn")
                    else:
                        print("⚠️ Mixed evidence for yield curve predictive power")
    else:
        print("Yield curve signals not available")

## 6. Signal Interaction Analysis

In [None]:
# Analyze signal interactions and correlations
print("=== SIGNAL INTERACTION ANALYSIS ===\n")

# Combine all signals into one dataframe
all_signals = pd.DataFrame()

# Momentum signals
if 'momentum' in signals_dict:
    momentum_signals = signals_dict['momentum']
    if 'Momentum_Selected' in momentum_signals.columns:
        # Convert list to binary signals for each asset
        for asset in config['assets']['risky']:
            ticker = asset['ticker']
            all_signals[f'{ticker}_Momentum_Selected'] = momentum_signals['Momentum_Selected'].apply(
                lambda x: ticker in x if isinstance(x, list) else False
            )

# Trend signals
if 'trend' in signals_dict:
    trend_signals = signals_dict['trend']
    trend_cols = [col for col in trend_signals.columns if 'Trend_Up' in col]
    for col in trend_cols:
        all_signals[col] = trend_signals[col]

# Market health signals
if 'market_health' in signals_dict:
    market_health_signals = signals_dict['market_health']
    if 'Market_Stress_Signal' in market_health_signals.columns:
        all_signals['Market_Stress'] = market_health_signals['Market_Stress_Signal']

# Yield curve signals
if 'yield_curve' in signals_dict:
    yield_curve_signals = signals_dict['yield_curve']
    if 'Full_Cash_Signal' in yield_curve_signals.columns:
        all_signals['Yield_Curve_Inversion'] = yield_curve_signals['Full_Cash_Signal']

# Analyze signal correlations
if not all_signals.empty:
    signal_corr = all_signals.corr()
    
    # Plot correlation heatmap
    plt.figure(figsize=(12, 10))
    mask = np.triu(np.ones_like(signal_corr, dtype=bool))
    sns.heatmap(signal_corr, mask=mask, annot=True, cmap='coolwarm', 
                center=0, square=True, linewidths=0.5, cbar_kws={"shrink": 0.8})
    plt.title('Signal Correlation Matrix')
    plt.tight_layout()
    plt.show()
    
    # Signal frequency analysis
    print("\nSignal Frequency Analysis:")
    for col in all_signals.columns:
        signal_data = all_signals[col].dropna()
        if len(signal_data) > 0:
            freq = signal_data.mean()
            print(f"  {col}: {freq:.1%} active")
    
    # Signal co-occurrence analysis
    print("\nSignal Co-occurrence Analysis:")
    
    # Check how often signals fire together
    if len(all_signals.columns) > 1:
        signal_combinations = all_signals.sum(axis=1)
        print(f"Average signals active per period: {signal_combinations.mean():.2f}")
        print(f"Maximum signals active: {signal_combinations.max()}")
        print(f"Periods with no signals: {(signal_combinations == 0).sum()}")
        
        # Plot signal combination distribution
        plt.figure(figsize=(10, 6))
        signal_combinations.value_counts().sort_index().plot(kind='bar')
        plt.title('Signal Combination Frequency')
        plt.xlabel('Number of Active Signals')
        plt.ylabel('Frequency')
        plt.grid(True, alpha=0.3)
        plt.show()
else:
    print("No signals available for interaction analysis")

## 7. Signal Effectiveness Summary

In [None]:
# Generate comprehensive signal effectiveness summary
print("=== SIGNAL EFFECTIVENESS SUMMARY ===\n")

effectiveness_summary = {
    'Signal Type': [],
    'Frequency': [],
    'Effectiveness': [],
    'Status': []
}

# Momentum signal effectiveness
if 'momentum' in signals_dict:
    momentum_signals = signals_dict['momentum']
    if 'Top_1_Asset' in momentum_signals.columns:
        selections = momentum_signals['Top_1_Asset'].dropna()
        freq = len(selections) / len(momentum_signals) if len(momentum_signals) > 0 else 0
        
        effectiveness_summary['Signal Type'].append('Momentum Ranking')
        effectiveness_summary['Frequency'].append(f"{freq:.1%}")
        effectiveness_summary['Effectiveness'].append('High')
        effectiveness_summary['Status'].append('✅ Active')

# Trend filter effectiveness
if 'trend' in signals_dict:
    trend_signals = signals_dict['trend']
    trend_cols = [col for col in trend_signals.columns if 'Trend_Up' in col and 'QQQ' in col]
    if trend_cols:
        trend_data = trend_signals[trend_cols[0]].dropna()
        freq = trend_data.mean()
        
        effectiveness_summary['Signal Type'].append('Trend Filter')
        effectiveness_summary['Frequency'].append(f"{freq:.1%}")
        effectiveness_summary['Effectiveness'].append('Medium')
        effectiveness_summary['Status'].append('✅ Active')

# Market health effectiveness
if 'market_health' in signals_dict:
    market_health_signals = signals_dict['market_health']
    if 'Market_Stress_Signal' in market_health_signals.columns:
        stress_data = market_health_signals['Market_Stress_Signal'].dropna()
        freq = stress_data.mean()
        
        effectiveness_summary['Signal Type'].append('Market Health')
        effectiveness_summary['Frequency'].append(f"{freq:.1%}")
        effectiveness_summary['Effectiveness'].append('High')
        effectiveness_summary['Status'].append('✅ Active')

# Yield curve effectiveness
if 'yield_curve' in signals_dict:
    yield_curve_signals = signals_dict['yield_curve']
    if 'Full_Cash_Signal' in yield_curve_signals.columns:
        yc_data = yield_curve_signals['Full_Cash_Signal'].dropna()
        freq = yc_data.mean()
        
        effectiveness_summary['Signal Type'].append('Yield Curve')
        effectiveness_summary['Frequency'].append(f"{freq:.1%}")
        effectiveness_summary['Effectiveness'].append('Medium')
        effectiveness_summary['Status'].append('✅ Active')

# Display summary table
if effectiveness_summary['Signal Type']:
    summary_df = pd.DataFrame(effectiveness_summary)
    print(summary_df.to_string(index=False))
    
    print("\nSignal Status Legend:")
    print("✅ Active: Signal functioning as designed")
    print("⚠️ Warning: Signal may need parameter tuning")
    print("❌ Error: Signal not functioning properly")
    
    print("\nEffectiveness Levels:")
    print("High: Strong predictive power or risk management")
    print("Medium: Moderate effectiveness")
    print("Low: Limited effectiveness")
else:
    print("No signals available for summary")

print("\n=== SIGNAL ANALYSIS COMPLETE ===")
print("\nNext steps:")
print("1. Review signal effectiveness and adjust parameters if needed")
print("2. Run portfolio construction analysis (03_portfolio_construction.ipynb)")
print("3. Proceed with backtest evaluation")