# Multi-Year District Candidate Analysis - Interactive Exploration

This notebook analyzes district-level candidates (State House/Senate) across 2018-2024,
comparing their performance to statewide candidates with competitiveness filters.

## Features

1. **Multi-Year Coverage**: Analyze 2018, 2020, 2022, and 2024 elections
2. **Competitiveness Flags**: Filter by opposition quality (strong, moderate, weak, none)
3. **Candidate Tracking**: Follow individual candidates across multiple elections
4. **Crossover Appeal**: Identify candidates who win in unfavorable districts

## Getting Started

Run each cell in order by pressing Shift+Enter

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from district_candidate_analyzer_multiyear import MultiYearDistrictCandidateAnalyzer

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', None)

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("Libraries imported successfully")

## 1. Initialize Multi-Year Analyzer

In [None]:
# Initialize analyzer (loads all years: 2018-2024)
analyzer = MultiYearDistrictCandidateAnalyzer()

print("\nData loaded successfully!")

## 2. Search for Specific Candidates

Track individual candidates across all elections

In [None]:
# Search for James Talarico
print("=" * 80)
print("JAMES TALARICO - Career Trajectory")
print("=" * 80)

talarico = analyzer.track_candidate_over_time('Talarico', district_level='house')

if not talarico.empty:
    display(talarico)
    
    # Summary stats
    print("\n=== SUMMARY ===")
    print(f"Years served: {', '.join(map(str, sorted(talarico['year'].unique())))}")
    print(f"District: HD-{talarico['district'].iloc[0]}")
    print(f"Average vs top-of-ticket: {talarico['vs_top_ticket'].mean():.2f} points")
    print(f"Average winning margin: {talarico['winning_margin'].mean():.1f} points")
    print(f"Opposition faced: {list(talarico['opposition_strength'].unique())}")
else:
    print("No records found for Talarico")

In [None]:
# Search for Jasmine Crockett
print("=" * 80)
print("JASMINE CROCKETT - Career Trajectory")
print("=" * 80)

crockett = analyzer.track_candidate_over_time('Crockett', district_level='house')

if not crockett.empty:
    display(crockett)
    
    # Summary stats
    print("\n=== SUMMARY ===")
    print(f"Years served: {', '.join(map(str, sorted(crockett['year'].unique())))}")
    print(f"District: HD-{crockett['district'].iloc[0]}")
    print(f"Average vs top-of-ticket: {crockett['vs_top_ticket'].mean():.2f} points")
    print(f"Average winning margin: {crockett['winning_margin'].mean():.1f} points")
    print(f"Opposition faced: {list(crockett['opposition_strength'].unique())}")
else:
    print("No records found for Crockett")

## 3. Visualize Candidate Performance Over Time

In [None]:
# Plot Talarico's performance trajectory
if not talarico.empty:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Chart 1: Vote percentage over time
    ax1 = axes[0]
    ax1.plot(talarico['year'], talarico['percentage'], 
             marker='o', linewidth=2, markersize=10, color='blue', label='Talarico')
    ax1.plot(talarico['year'], talarico['top_ticket_pct'],
             marker='s', linewidth=2, markersize=8, color='lightblue', 
             linestyle='--', label='Top-of-Ticket (D)')
    ax1.axhline(y=50, color='black', linestyle='--', alpha=0.3)
    ax1.set_xlabel('Year')
    ax1.set_ylabel('Vote Percentage')
    ax1.set_title('James Talarico - Vote Performance')
    ax1.legend()
    ax1.grid(alpha=0.3)
    
    # Chart 2: vs Top-of-Ticket
    ax2 = axes[1]
    colors = ['green' if x > 0 else 'red' for x in talarico['vs_top_ticket']]
    ax2.bar(talarico['year'], talarico['vs_top_ticket'], color=colors, alpha=0.7)
    ax2.axhline(y=0, color='black', linestyle='-', linewidth=1)
    ax2.set_xlabel('Year')
    ax2.set_ylabel('Points vs Top-of-Ticket')
    ax2.set_title('Talarico Overperformance (Green) / Underperformance (Red)')
    ax2.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [None]:
# Plot Crockett's performance trajectory
if not crockett.empty:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Chart 1: Vote percentage over time
    ax1 = axes[0]
    ax1.plot(crockett['year'], crockett['percentage'], 
             marker='o', linewidth=2, markersize=10, color='blue', label='Crockett')
    ax1.plot(crockett['year'], crockett['top_ticket_pct'],
             marker='s', linewidth=2, markersize=8, color='lightblue', 
             linestyle='--', label='Top-of-Ticket (D)')
    ax1.axhline(y=50, color='black', linestyle='--', alpha=0.3)
    ax1.set_xlabel('Year')
    ax1.set_ylabel('Vote Percentage')
    ax1.set_title('Jasmine Crockett - Vote Performance')
    ax1.legend()
    ax1.grid(alpha=0.3)
    
    # Chart 2: vs Top-of-Ticket
    ax2 = axes[1]
    colors = ['green' if x > 0 else 'red' for x in crockett['vs_top_ticket']]
    ax2.bar(crockett['year'], crockett['vs_top_ticket'], color=colors, alpha=0.7)
    ax2.axhline(y=0, color='black', linestyle='-', linewidth=1)
    ax2.set_xlabel('Year')
    ax2.set_ylabel('Points vs Top-of-Ticket')
    ax2.set_title('Crockett Overperformance (Green) / Underperformance (Red)')
    ax2.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 4. Compare Multiple Candidates

Search for and compare any candidates you're interested in

In [None]:
# CUSTOMIZE THIS: Search for any candidate
CANDIDATE_NAME = "Morales"  # Change this to search for different candidates

print(f"Searching for candidates matching '{CANDIDATE_NAME}'...\n")

results = analyzer.track_candidate_over_time(CANDIDATE_NAME, district_level='house')

if not results.empty:
    print(f"Found {len(results)} election records:\n")
    display(results)
    
    # Group by candidate (in case multiple with same last name)
    for district, group in results.groupby('district'):
        print(f"\n=== HD-{district}: {group['candidate'].iloc[0]} ===")
        print(f"Years: {', '.join(map(str, sorted(group['year'].unique())))}")
        print(f"Average vs top-of-ticket: {group['vs_top_ticket'].mean():.2f} points")
        print(f"District partisan lean: {group['partisan_lean'].iloc[0]:.1f} (D+/R-)")
else:
    print(f"No candidates found matching '{CANDIDATE_NAME}'")

## 5. Top Performers by Year

Find strongest candidates with real opposition

In [None]:
# Top Democrats in 2024 (with major party opposition)
print("=" * 80)
print("TOP 15 DEMOCRATS - 2024 (Major Party Opposition Only)")
print("=" * 80)

strong_2024 = analyzer.identify_strong_candidates(
    district_level='house',
    year=2024,
    party='D',
    min_vs_top_ticket=2.0,
    require_major_party_opponent=True
)

display(strong_2024[[
    'district', 'candidate', 'percentage', 'top_ticket_pct', 'vs_top_ticket',
    'opposition_strength', 'winning_margin', 'partisan_lean'
]].head(15))

# Check if Talarico or Crockett are in top performers
if not strong_2024.empty:
    for name in ['Talarico', 'Crockett']:
        matches = strong_2024[strong_2024['candidate'].str.contains(name, case=False, na=False)]
        if not matches.empty:
            print(f"\n{name} is in top performers! Rank: {matches.index[0] + 1}")

## 6. Crossover Appeal Analysis

Find candidates who won in unfavorable districts

In [None]:
# Democrats with crossover appeal (all years)
print("=" * 80)
print("DEMOCRATS WITH CROSSOVER APPEAL (All Years)")
print("Won R-leaning districts with major party opposition")
print("=" * 80)

crossover = analyzer.identify_crossover_appeal_candidates(
    district_level='house',
    party='D',
    require_major_party_opponent=True
)

display(crossover[[
    'year', 'district', 'candidate', 'percentage', 'vs_top_ticket',
    'partisan_lean', 'opposition_strength', 'winning_margin'
]])

## 7. Custom District Search

Analyze any specific district across all years

In [None]:
# CUSTOMIZE THIS: Analyze a specific district
DISTRICT_NUMBER = 50  # Change this to any district number (1-150)

print(f"=" * 80)
print(f"STATE HOUSE DISTRICT {DISTRICT_NUMBER} - All Years")
print(f"=" * 80)

# Get all data
all_data = analyzer.calculate_vs_top_ticket(district_level='house')

# Filter to specific district
district_data = all_data[all_data['district'] == str(DISTRICT_NUMBER)].sort_values('year')

if not district_data.empty:
    display(district_data[[
        'year', 'candidate', 'party', 'percentage', 'top_ticket_pct', 'vs_top_ticket',
        'partisan_lean', 'has_major_party_opponent', 'opposition_strength'
    ]])
    
    # Plot district trends
    fig, ax = plt.subplots(figsize=(10, 6))
    
    for party in district_data['party'].unique():
        party_data = district_data[district_data['party'] == party]
        color = 'blue' if party == 'D' else 'red' if party == 'R' else 'gray'
        ax.plot(party_data['year'], party_data['percentage'],
                marker='o', linewidth=2, markersize=8, label=f"{party}", color=color)
    
    ax.axhline(y=50, color='black', linestyle='--', alpha=0.3)
    ax.set_xlabel('Year')
    ax.set_ylabel('Vote Percentage')
    ax.set_title(f'HD-{DISTRICT_NUMBER} Performance Over Time')
    ax.legend()
    ax.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print(f"No data found for District {DISTRICT_NUMBER}")

## 8. Export Data for Further Analysis

In [None]:
# Export specific candidate data
if not talarico.empty:
    talarico.to_csv('talarico_analysis.csv', index=False)
    print("Exported: talarico_analysis.csv")

if not crockett.empty:
    crockett.to_csv('crockett_analysis.csv', index=False)
    print("Exported: crockett_analysis.csv")

# Export all strong 2024 candidates
if not strong_2024.empty:
    strong_2024.to_csv('strong_candidates_2024.csv', index=False)
    print("Exported: strong_candidates_2024.csv")

## Next Steps

1. **Search for other candidates**: Modify the CANDIDATE_NAME variable in Section 4
2. **Analyze specific districts**: Change DISTRICT_NUMBER in Section 7
3. **Adjust filters**: Try different years, parties, or competitiveness thresholds
4. **Compare candidates**: Run multiple searches and compare results

For the original single-year analysis, see: `01_candidate_strength_exploration.ipynb`