# Copy Congress Strategy - Signal Analysis

This notebook analyzes trading signals generated from Congressional trade data.

## Objectives
1. Generate and analyze Congressional trade signals
2. Evaluate signal quality and predictive power
3. Analyze committee and bipartisan effects
4. Test signal parameters

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
import warnings
warnings.filterwarnings('ignore')

from data_acquisition import CongressionalDataAcquisition
from signal_generator import SignalGenerator

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print('Imports complete')

## 1. Load Data

In [None]:
# Load configuration
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print('Configuration loaded')

In [None]:
# Load data
data_acq = CongressionalDataAcquisition(config)
congressional_trades, prices, volumes, market_caps, volatility = data_acq.get_full_dataset()

print('Data loaded successfully')

## 2. Generate Signals

In [None]:
# Initialize signal generator
signal_gen = SignalGenerator(config)

# Create rebalance dates
rebalance_dates = pd.date_range(
    start=prices.index[0],
    end=prices.index[-1],
    freq=config['portfolio']['rebalance_frequency']
)

print(f"Rebalance frequency: {config['portfolio']['rebalance_frequency']}")
print(f"Number of rebalance dates: {len(rebalance_dates)}")

In [None]:
# Generate signals
signals_history = signal_gen.generate_signals_timeseries(
    congressional_trades,
    prices,
    rebalance_dates.tolist()
)

print(f"\nSignals generated for {len(signals_history)} dates")

## 3. Analyze Signal Quality

In [None]:
# Get example signals
example_date = list(signals_history.keys())[len(signals_history)//2]
example_signals = signals_history[example_date]

print(f"Example signals for {example_date}:")
print(f"Number of signals: {len(example_signals)}")
print("\nTop 10 signals:")
example_signals.sort_values('signal_weighted', ascending=False).head(10)

In [None]:
# Analyze signal quality over time
quality_metrics = signal_gen.analyze_signal_quality(signals_history)

# Plot signal counts over time
plt.figure(figsize=(14, 6))
plt.plot(quality_metrics.index, quality_metrics['n_signals'], linewidth=2)
plt.xlabel('Date')
plt.ylabel('Number of Signals')
plt.title('Signal Count Over Time')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Net flow analysis
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Average net flow
axes[0].plot(quality_metrics.index, quality_metrics['avg_net_flow'] / 1e6, linewidth=2)
axes[0].axhline(y=0, color='black', linestyle='--', alpha=0.3)
axes[0].set_ylabel('Average Net Flow ($ Millions)')
axes[0].set_title('Average Net Congressional Flow per Signal')
axes[0].grid(True, alpha=0.3)

# Buy signal percentage
axes[1].plot(quality_metrics.index, quality_metrics['pct_buy_signals'] * 100, 
            linewidth=2, color='green')
axes[1].axhline(y=50, color='black', linestyle='--', alpha=0.3)
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Buy Signal %')
axes[1].set_title('Percentage of Buy Signals')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Signal distribution
all_signals = pd.concat([signals for signals in signals_history.values()])

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Net flow distribution
axes[0].hist(all_signals['net_flow'] / 1000, bins=50, edgecolor='black')
axes[0].set_xlabel('Net Flow ($1000s)')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Net Flow Distribution')
axes[0].axvline(x=0, color='red', linestyle='--')

# Normalized signal distribution
axes[1].hist(all_signals['signal_normalized'], bins=50, edgecolor='black')
axes[1].set_xlabel('Normalized Signal')
axes[1].set_ylabel('Frequency')
axes[1].set_title('Normalized Signal Distribution')
axes[1].axvline(x=0, color='red', linestyle='--')

# Politician count distribution
axes[2].hist(all_signals['n_politicians'], bins=range(1, all_signals['n_politicians'].max()+2), 
            edgecolor='black', align='left')
axes[2].set_xlabel('Number of Politicians')
axes[2].set_ylabel('Frequency')
axes[2].set_title('Politicians per Signal Distribution')

plt.tight_layout()
plt.show()

print(f"\nSignal Statistics:")
print(f"Average politicians per signal: {all_signals['n_politicians'].mean():.2f}")
print(f"Average transactions per signal: {all_signals['n_total'].mean():.2f}")
print(f"Buy/Sell ratio: {all_signals['n_buys'].sum() / all_signals['n_sells'].sum():.2f}")

## 4. Committee and Bipartisan Effects

In [None]:
# Committee score analysis
if config['feature_engineering']['committee_weighting']:
    print('Committee Weighting Analysis:')
    print(f"Average committee score: {all_signals['committee_score'].mean():.2f}")
    print(f"Max committee score: {all_signals['committee_score'].max():.2f}")
    
    plt.figure(figsize=(12, 6))
    plt.hist(all_signals['committee_score'], bins=30, edgecolor='black')
    plt.xlabel('Committee Score')
    plt.ylabel('Frequency')
    plt.title('Distribution of Committee Scores')
    plt.axvline(x=all_signals['committee_score'].mean(), color='red', 
               linestyle='--', label='Mean')
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
# Bipartisan score analysis
if config['feature_engineering']['bipartisan_filter']:
    print('\nBipartisan Agreement Analysis:')
    print(f"Average bipartisan score: {all_signals['bipartisan_score'].mean():.2f}")
    
    bipartisan_counts = all_signals['bipartisan_score'].value_counts().sort_index()
    
    plt.figure(figsize=(10, 6))
    plt.bar(bipartisan_counts.index, bipartisan_counts.values, edgecolor='black')
    plt.xlabel('Bipartisan Score')
    plt.ylabel('Frequency')
    plt.title('Distribution of Bipartisan Scores')
    plt.xticks(bipartisan_counts.index)
    plt.tight_layout()
    plt.show()
    
    # Percentage of bipartisan signals
    strong_bipartisan = (all_signals['bipartisan_score'] > 1.0).sum()
    print(f"\nStrong bipartisan signals: {strong_bipartisan / len(all_signals):.1%}")

## 5. Signal Persistence Analysis

In [None]:
# Track how long tickers remain in signal set
ticker_appearances = {}

for date, signals in signals_history.items():
    for ticker in signals['ticker']:
        if ticker not in ticker_appearances:
            ticker_appearances[ticker] = []
        ticker_appearances[ticker].append(date)

# Calculate persistence
ticker_persistence = {ticker: len(dates) for ticker, dates in ticker_appearances.items()}
persistence_series = pd.Series(ticker_persistence).sort_values(ascending=False)

print('Signal Persistence Analysis:')
print(f"Average appearances: {persistence_series.mean():.1f}")
print(f"Median appearances: {persistence_series.median():.0f}")
print(f"\nTop 10 most persistent signals:")
print(persistence_series.head(10))

# Plot distribution
plt.figure(figsize=(12, 6))
plt.hist(persistence_series, bins=50, edgecolor='black')
plt.xlabel('Number of Appearances')
plt.ylabel('Number of Tickers')
plt.title('Signal Persistence Distribution')
plt.yscale('log')
plt.tight_layout()
plt.show()

## 6. Lookback Window Sensitivity

In [None]:
# Test different lookback windows
lookback_windows = [15, 30, 45, 60, 90]
test_date = list(signals_history.keys())[len(signals_history)//2]

lookback_results = []

for window in lookback_windows:
    # Temporarily modify config
    temp_config = config.copy()
    temp_config['signal']['lookback_days'] = window
    
    # Generate signals
    temp_signal_gen = SignalGenerator(temp_config)
    signals = temp_signal_gen.aggregate_trade_flows(
        congressional_trades,
        prices,
        test_date
    )
    
    lookback_results.append({
        'lookback': window,
        'n_signals': len(signals),
        'avg_net_flow': signals['net_flow'].mean() if len(signals) > 0 else 0,
        'avg_politicians': signals['n_politicians'].mean() if len(signals) > 0 else 0
    })

lookback_df = pd.DataFrame(lookback_results)

# Plot results
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(lookback_df['lookback'], lookback_df['n_signals'], marker='o', linewidth=2)
axes[0].set_xlabel('Lookback Window (Days)')
axes[0].set_ylabel('Number of Signals')
axes[0].set_title('Signal Count vs Lookback Window')
axes[0].grid(True, alpha=0.3)

axes[1].plot(lookback_df['lookback'], lookback_df['avg_net_flow'] / 1000, 
            marker='o', linewidth=2, color='green')
axes[1].set_xlabel('Lookback Window (Days)')
axes[1].set_ylabel('Average Net Flow ($1000s)')
axes[1].set_title('Average Net Flow vs Lookback Window')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print('\nLookback Window Sensitivity:')
print(lookback_df.to_string(index=False))

## Summary

Key findings from signal analysis:
- Signal generation quality and consistency
- Impact of committee weighting
- Bipartisan agreement effects
- Signal persistence characteristics
- Optimal lookback window selection