# Copy Congress Strategy - Data Exploration

This notebook explores Congressional trade data and market data for the Copy Congress trading strategy.

## Objectives
1. Load and inspect Congressional trade disclosures
2. Analyze trade patterns by politician, party, and committee
3. Explore market data for traded securities
4. Visualize trade flows and timing

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
import warnings
warnings.filterwarnings('ignore')

from data_acquisition import CongressionalDataAcquisition

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print('Imports complete')

## 1. Load Configuration and Data

In [None]:
# Load configuration
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print('Configuration loaded')
print(f"Period: {config['data']['start_date']} to {config['data']['end_date']}")
print(f"Lookback window: {config['signal']['lookback_days']} days")

In [None]:
# Load data
data_acq = CongressionalDataAcquisition(config)
congressional_trades, prices, volumes, market_caps, volatility = data_acq.get_full_dataset()

print('\nData loaded successfully')

## 2. Explore Congressional Trade Data

In [None]:
# Basic statistics
print('Congressional Trades Summary:')
print(f"Total trades: {len(congressional_trades):,}")
print(f"Date range: {congressional_trades['filing_date'].min()} to {congressional_trades['filing_date'].max()}")
print(f"Unique tickers: {congressional_trades['ticker'].nunique()}")
print(f"Unique politicians: {congressional_trades['politician'].nunique()}")

congressional_trades.head(10)

In [None]:
# Trade type distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Buy vs Sell
trade_counts = congressional_trades['transaction_type'].value_counts()
axes[0].bar(trade_counts.index, trade_counts.values, color=['green', 'red'])
axes[0].set_title('Trade Type Distribution')
axes[0].set_ylabel('Number of Trades')

# Transaction amounts
axes[1].hist(congressional_trades['amount'], bins=50, edgecolor='black')
axes[1].set_title('Transaction Amount Distribution')
axes[1].set_xlabel('Amount ($)')
axes[1].set_ylabel('Frequency')
axes[1].set_yscale('log')

plt.tight_layout()
plt.show()

print(f"\nBuy ratio: {(trade_counts['buy'] / trade_counts.sum()):.1%}")
print(f"Average transaction: ${congressional_trades['amount'].mean():,.0f}")
print(f"Median transaction: ${congressional_trades['amount'].median():,.0f}")

In [None]:
# Most traded tickers
ticker_counts = congressional_trades['ticker'].value_counts().head(20)

plt.figure(figsize=(12, 6))
plt.barh(range(len(ticker_counts)), ticker_counts.values)
plt.yticks(range(len(ticker_counts)), ticker_counts.index)
plt.xlabel('Number of Trades')
plt.title('Top 20 Most Traded Tickers by Congress')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

In [None]:
# Trades by party
if 'party' in congressional_trades.columns:
    party_stats = congressional_trades.groupby('party').agg({
        'amount': ['count', 'sum', 'mean'],
        'ticker': 'nunique'
    }).round(0)
    
    print('\nTrades by Party:')
    print(party_stats)
    
    # Visualization
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    party_counts = congressional_trades['party'].value_counts()
    axes[0].bar(party_counts.index, party_counts.values, color=['blue', 'red'])
    axes[0].set_title('Trades by Party')
    axes[0].set_ylabel('Number of Trades')
    
    party_amounts = congressional_trades.groupby('party')['amount'].sum()
    axes[1].bar(party_amounts.index, party_amounts.values, color=['blue', 'red'])
    axes[1].set_title('Total Trade Volume by Party')
    axes[1].set_ylabel('Total Amount ($)')
    
    plt.tight_layout()
    plt.show()

In [None]:
# Trades by committee
if 'committee' in congressional_trades.columns:
    committee_stats = congressional_trades.groupby('committee').agg({
        'amount': ['count', 'sum', 'mean'],
        'ticker': 'nunique'
    }).round(0)
    
    committee_stats = committee_stats.sort_values(('amount', 'count'), ascending=False)
    
    print('\nTrades by Committee:')
    print(committee_stats)
    
    # Visualization
    committee_counts = congressional_trades['committee'].value_counts()
    plt.figure(figsize=(12, 6))
    plt.barh(range(len(committee_counts)), committee_counts.values)
    plt.yticks(range(len(committee_counts)), committee_counts.index)
    plt.xlabel('Number of Trades')
    plt.title('Trades by Committee')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()

In [None]:
# Filing delay analysis
congressional_trades['filing_delay'] = (
    congressional_trades['filing_date'] - congressional_trades['transaction_date']
).dt.days

plt.figure(figsize=(12, 6))
plt.hist(congressional_trades['filing_delay'], bins=50, edgecolor='black')
plt.xlabel('Filing Delay (Days)')
plt.ylabel('Frequency')
plt.title('Distribution of Filing Delays')
plt.axvline(congressional_trades['filing_delay'].median(), color='red', 
           linestyle='--', label=f'Median: {congressional_trades["filing_delay"].median():.0f} days')
plt.legend()
plt.tight_layout()
plt.show()

print(f"\nFiling Delay Statistics:")
print(f"Mean: {congressional_trades['filing_delay'].mean():.1f} days")
print(f"Median: {congressional_trades['filing_delay'].median():.0f} days")
print(f"Min: {congressional_trades['filing_delay'].min()} days")
print(f"Max: {congressional_trades['filing_delay'].max()} days")

## 3. Explore Market Data

In [None]:
# Price data summary
print('Market Data Summary:')
print(f"Number of securities: {len(prices.columns)}")
print(f"Date range: {prices.index[0]} to {prices.index[-1]}")
print(f"Trading days: {len(prices)}")

# Sample of price evolution
sample_tickers = prices.columns[:5]
plt.figure(figsize=(12, 6))
for ticker in sample_tickers:
    normalized_prices = prices[ticker] / prices[ticker].iloc[0] * 100
    plt.plot(normalized_prices.index, normalized_prices, label=ticker)

plt.xlabel('Date')
plt.ylabel('Normalized Price (Base = 100)')
plt.title('Sample Price Evolution')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Volatility analysis
avg_volatility = volatility.mean().sort_values(ascending=False)

print('\nVolatility Summary:')
print(f"Average volatility: {avg_volatility.mean():.1%}")
print(f"Median volatility: {avg_volatility.median():.1%}")
print(f"\nTop 10 Most Volatile:')
print(avg_volatility.head(10))

# Histogram
plt.figure(figsize=(12, 6))
plt.hist(avg_volatility * 100, bins=30, edgecolor='black')
plt.xlabel('Annualized Volatility (%)')
plt.ylabel('Frequency')
plt.title('Distribution of Historical Volatility')
plt.axvline(avg_volatility.mean() * 100, color='red', linestyle='--', label='Mean')
plt.legend()
plt.tight_layout()
plt.show()

## 4. Time Series Analysis of Congressional Trades

In [None]:
# Trades over time
trades_by_month = congressional_trades.set_index('filing_date').resample('M').size()

plt.figure(figsize=(14, 6))
plt.plot(trades_by_month.index, trades_by_month.values, linewidth=2)
plt.xlabel('Date')
plt.ylabel('Number of Trades')
plt.title('Congressional Trades Over Time (Monthly)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\nAverage trades per month: {trades_by_month.mean():.1f}")

In [None]:
# Net buy/sell flow over time
congressional_trades['net_flow'] = congressional_trades.apply(
    lambda x: x['amount'] if x['transaction_type'] == 'buy' else -x['amount'], axis=1
)

monthly_flow = congressional_trades.set_index('filing_date').resample('M')['net_flow'].sum()

plt.figure(figsize=(14, 6))
colors = ['green' if x > 0 else 'red' for x in monthly_flow]
plt.bar(monthly_flow.index, monthly_flow.values / 1e6, color=colors, alpha=0.7)
plt.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
plt.xlabel('Date')
plt.ylabel('Net Flow ($ Millions)')
plt.title('Congressional Net Buy/Sell Flow Over Time')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 5. Correlation Analysis

In [None]:
# Sample correlation matrix
sample_tickers = congressional_trades['ticker'].value_counts().head(10).index
sample_prices = prices[sample_tickers]
sample_returns = sample_prices.pct_change().dropna()

correlation_matrix = sample_returns.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0,
           square=True, linewidths=1, cbar_kws={'label': 'Correlation'})
plt.title('Return Correlation Matrix - Top 10 Traded Stocks')
plt.tight_layout()
plt.show()

## Summary

Key insights from data exploration:
- Congressional trade patterns and volumes
- Filing delays between transaction and disclosure
- Most actively traded securities
- Market volatility characteristics
- Time series trends in Congressional trading activity