# Investigation 3: Validate Momentum on 2023 Data

**Question**: Does momentum's +0.8% improvement and 92.3% HC accuracy generalize to 2023, or was it 2024-specific?

**Context**: Ablation study on 2024 showed:
- Momentum Features: 67.6% accuracy (+0.8% vs baseline)
- HC Accuracy: 92.3% (best of all tests!)
- BUT: Your Week 10-14 production showed only 55.0% HC accuracy

**Risk**: Momentum may be overfitted to 2024 season characteristics

**Hypothesis**: If momentum is truly predictive, it should also improve performance on 2023 data

**Test**: Re-run Test #3 (Momentum Features) with TEST_YEAR = 2023

**Expected Outcomes**:
- If momentum helps on 2023 → Deploy to Week 16 with confidence ✅
- If momentum hurts on 2023 → Year-specific overfitting, DO NOT deploy ❌
- If momentum neutral on 2023 → Proceed cautiously, test on 2022 as well ⚠️

## Setup

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.feature_selection import RFECV
from sklearn.metrics import accuracy_score, brier_score_loss, roc_auc_score
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')

print("✅ Libraries loaded")

## Load Cached Data

Reuse data from ablation study to save time

In [None]:
# Load from ablation study cache
import os

cache_dir = 'ablation_cache'
if os.path.exists(f'{cache_dir}/pbp_data.csv'):
    print("Loading cached data from ablation study...")
    pbp_data = pd.read_csv(f'{cache_dir}/pbp_data.csv')
    weekly_data = pd.read_csv(f'{cache_dir}/weekly_data.csv')
    schedule_data = pd.read_csv(f'{cache_dir}/schedule_data.csv')
    print(f"✅ Loaded {len(pbp_data):,} play-by-play rows")
    print(f"✅ Loaded {len(weekly_data):,} weekly stat rows")
    print(f"✅ Loaded {len(schedule_data):,} games")
else:
    print("❌ Cache not found. Run debug_enhanced_model.ipynb first to generate cache.")
    raise FileNotFoundError("Ablation study cache required")

## Define Momentum Feature Engineering

Extract exact implementation from ablation study

In [None]:
def add_momentum_features(features, weekly_data, team, season, week):
    """
    Add momentum features based on last 3 games.
    
    Implementation from ablation study:
    - momentum_last3: Approximate win rate from fantasy points scored
    - Formula: last_3_weeks.fantasy_points.mean() / 30.0
    """
    team_stats = weekly_data[
        (weekly_data['recent_team'] == team) &
        (weekly_data['season'] == season) &
        (weekly_data['week'] < week)
    ]
    
    if len(team_stats) >= 3:
        last_3_weeks = sorted(team_stats['week'].unique())[-3:]
        last_3_stats = team_stats[team_stats['week'].isin(last_3_weeks)]
        # Approximate win rate from points scored vs allowed
        features['momentum_last3'] = last_3_stats.groupby('week')['fantasy_points'].sum().mean() / 30.0
    else:
        features['momentum_last3'] = 0.5  # Neutral for early season
    
    return features

def create_game_features_with_momentum(home_team, away_team, season, week, weekly_data):
    """
    Create game features including momentum.
    """
    features = {}
    
    # Get team stats
    home_stats = weekly_data[
        (weekly_data['recent_team'] == home_team) &
        (weekly_data['season'] == season) &
        (weekly_data['week'] < week)
    ]
    away_stats = weekly_data[
        (weekly_data['recent_team'] == away_team) &
        (weekly_data['season'] == season) &
        (weekly_data['week'] < week)
    ]
    
    # Basic features (simplified version)
    for prefix, stats in [('home', home_stats), ('away', away_stats)]:
        if len(stats) > 0:
            features[f'{prefix}_passing_ypg'] = stats['passing_yards'].sum() / len(stats['week'].unique())
            features[f'{prefix}_rushing_ypg'] = stats['rushing_yards'].sum() / len(stats['week'].unique())
            features[f'{prefix}_total_ypg'] = features[f'{prefix}_passing_ypg'] + features[f'{prefix}_rushing_ypg']
        else:
            features[f'{prefix}_passing_ypg'] = 0
            features[f'{prefix}_rushing_ypg'] = 0
            features[f'{prefix}_total_ypg'] = 0
    
    # Add momentum features
    home_features = {}
    away_features = {}
    add_momentum_features(home_features, weekly_data, home_team, season, week)
    add_momentum_features(away_features, weekly_data, away_team, season, week)
    
    features['home_momentum_last3'] = home_features.get('momentum_last3', 0.5)
    features['away_momentum_last3'] = away_features.get('momentum_last3', 0.5)
    features['momentum_advantage'] = features['home_momentum_last3'] - features['away_momentum_last3']
    
    # Contextual
    features['season'] = season
    features['week'] = week
    features['is_playoff'] = 1 if week >= 18 else 0
    
    return features

print("✅ Momentum feature functions defined")

## Build Datasets for 2023 and 2024 Testing

In [None]:
def build_dataset_for_momentum_test(weekly_data, schedule_data, test_year):
    """
    Build dataset with and without momentum features.
    Train on all data before test_year, test on test_year.
    """
    print(f"\nBuilding datasets for TEST_YEAR = {test_year}...")
    
    games = []
    
    # Use all seasons from 2015 to test_year
    for season in range(2015, test_year + 1):
        season_schedule = schedule_data[schedule_data['season'] == season]
        
        for week in range(1, 19):
            week_games = season_schedule[season_schedule['week'] == week]
            
            for _, game in week_games.iterrows():
                home_team = game['home_team']
                away_team = game['away_team']
                
                if pd.isna(home_team) or pd.isna(away_team):
                    continue
                
                # Create features WITH momentum
                features_with = create_game_features_with_momentum(
                    home_team, away_team, season, week, weekly_data
                )
                
                # Outcome
                home_score = game.get('home_score', 0)
                away_score = game.get('away_score', 0)
                
                if pd.notna(home_score) and pd.notna(away_score):
                    features_with['home_win'] = 1 if home_score > away_score else 0
                    features_with['season'] = season
                    features_with['week'] = week
                    games.append(features_with)
    
    df_with_momentum = pd.DataFrame(games)
    
    # Create baseline version (remove momentum columns)
    df_baseline = df_with_momentum.drop(columns=['home_momentum_last3', 'away_momentum_last3', 'momentum_advantage'], errors='ignore')
    
    print(f"✅ Built {len(df_with_momentum)} games")
    print(f"   Baseline features: {len(df_baseline.columns) - 3}")
    print(f"   With momentum features: {len(df_with_momentum.columns) - 3}")
    
    return df_baseline, df_with_momentum

# Build for 2023
df_2023_baseline, df_2023_momentum = build_dataset_for_momentum_test(weekly_data, schedule_data, 2023)

# Build for 2024 (for comparison)
df_2024_baseline, df_2024_momentum = build_dataset_for_momentum_test(weekly_data, schedule_data, 2024)

## Test Momentum on 2023 Data

In [None]:
def test_momentum_on_year(df_baseline, df_momentum, test_year):
    """
    Walk-forward validation on test_year.
    Compare baseline vs momentum features.
    """
    print(f"\n{'='*80}")
    print(f"TESTING MOMENTUM ON {test_year} DATA")
    print(f"{'='*80}")
    
    train_end_year = test_year - 1
    
    # Split train/test
    train_baseline = df_baseline[df_baseline['season'] < test_year]
    test_baseline = df_baseline[df_baseline['season'] == test_year]
    
    train_momentum = df_momentum[df_momentum['season'] < test_year]
    test_momentum = df_momentum[df_momentum['season'] == test_year]
    
    print(f"\nTraining data: 2015-{train_end_year} ({len(train_baseline)} games)")
    print(f"Testing data: {test_year} ({len(test_baseline)} games)")
    
    results = {}
    
    for config_name, train_df, test_df in [
        ("BASELINE (No Momentum)", train_baseline, test_baseline),
        ("WITH MOMENTUM FEATURES", train_momentum, test_momentum)
    ]:
        print(f"\n{'-'*80}")
        print(f"Testing: {config_name}")
        print(f"{'-'*80}")
        
        # Prepare features
        X_train = train_df.drop(columns=['home_win', 'season', 'week'])
        y_train = train_df['home_win']
        X_test = test_df.drop(columns=['home_win', 'season', 'week'])
        y_test = test_df['home_win']
        
        # Handle missing values
        X_train = X_train.fillna(0)
        X_test = X_test.fillna(0)
        
        # Feature selection (RFE)
        rf_selector = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
        rfe = RFECV(rf_selector, min_features_to_select=10, cv=3, scoring='accuracy')
        rfe.fit(X_train, y_train)
        
        X_train_selected = rfe.transform(X_train)
        X_test_selected = rfe.transform(X_test)
        
        selected_features = X_train.columns[rfe.support_].tolist()
        print(f"   Selected {len(selected_features)} features via RFE")
        if 'momentum_advantage' in selected_features or 'home_momentum_last3' in selected_features:
            print(f"   ✅ RFE selected momentum features (model considers them useful)")
        elif config_name == "WITH MOMENTUM FEATURES":
            print(f"   ⚠️ RFE did NOT select momentum features (model doesn't find them useful)")
        
        # Train ensemble (3-model like baseline)
        rf = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=42)
        lr = LogisticRegression(C=1.0, max_iter=1000, random_state=42)
        xgb = XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=200, random_state=42, eval_metric='logloss')
        
        ensemble = VotingClassifier(
            estimators=[('rf', rf), ('lr', lr), ('xgb', xgb)],
            voting='soft'
        )
        
        calibrated_model = CalibratedClassifierCV(ensemble, method='isotonic', cv=3)
        calibrated_model.fit(X_train_selected, y_train)
        
        # Predict
        y_pred = calibrated_model.predict(X_test_selected)
        y_proba = calibrated_model.predict_proba(X_test_selected)[:, 1]
        
        # Metrics
        accuracy = accuracy_score(y_test, y_pred)
        
        # High-confidence accuracy
        hc_mask = (y_proba >= 0.70) | (y_proba <= 0.30)
        if hc_mask.sum() > 0:
            hc_accuracy = accuracy_score(y_test[hc_mask], y_pred[hc_mask])
        else:
            hc_accuracy = None
        
        brier = brier_score_loss(y_test, y_proba)
        auc = roc_auc_score(y_test, y_proba)
        
        results[config_name] = {
            'accuracy': accuracy,
            'hc_accuracy': hc_accuracy,
            'brier_score': brier,
            'auc_roc': auc,
            'n_games': len(y_test),
            'n_hc_games': hc_mask.sum()
        }
        
        print(f"\n   Results:")
        print(f"   Overall Accuracy: {accuracy:.1%} ({int(accuracy * len(y_test))}/{len(y_test)} games)")
        if hc_accuracy is not None:
            print(f"   HC Accuracy (≥70%): {hc_accuracy:.1%} ({hc_mask.sum()} games)")
        print(f"   Brier Score: {brier:.3f}")
        print(f"   AUC-ROC: {auc:.3f}")
    
    return results

# Test on 2023
results_2023 = test_momentum_on_year(df_2023_baseline, df_2023_momentum, 2023)

## Test Momentum on 2024 Data (For Comparison)

In [None]:
# Test on 2024 for comparison
results_2024 = test_momentum_on_year(df_2024_baseline, df_2024_momentum, 2024)

## Compare Results Across Years

In [None]:
print("\n" + "="*80)
print("MOMENTUM FEATURE VALIDATION - CROSS-YEAR COMPARISON")
print("="*80)

comparison_data = []

for year, results in [(2023, results_2023), (2024, results_2024)]:
    baseline = results['BASELINE (No Momentum)']
    momentum = results['WITH MOMENTUM FEATURES']
    
    delta = momentum['accuracy'] - baseline['accuracy']
    hc_delta = (momentum['hc_accuracy'] - baseline['hc_accuracy']) if (momentum['hc_accuracy'] and baseline['hc_accuracy']) else None
    
    comparison_data.append({
        'Year': year,
        'Baseline Acc': f"{baseline['accuracy']:.1%}",
        'Momentum Acc': f"{momentum['accuracy']:.1%}",
        'Delta': f"{delta:+.1%}",
        'Baseline HC': f"{baseline['hc_accuracy']:.1%}" if baseline['hc_accuracy'] else 'N/A',
        'Momentum HC': f"{momentum['hc_accuracy']:.1%}" if momentum['hc_accuracy'] else 'N/A',
        'HC Delta': f"{hc_delta:+.1%}" if hc_delta else 'N/A'
    })

comparison_df = pd.DataFrame(comparison_data)
print("\n" + comparison_df.to_string(index=False))

print("\n" + "="*80)
print("INTERPRETATION & RECOMMENDATION")
print("="*80)

# Calculate average delta
delta_2023 = results_2023['WITH MOMENTUM FEATURES']['accuracy'] - results_2023['BASELINE (No Momentum)']['accuracy']
delta_2024 = results_2024['WITH MOMENTUM FEATURES']['accuracy'] - results_2024['BASELINE (No Momentum)']['accuracy']
avg_delta = (delta_2023 + delta_2024) / 2

print(f"\nAverage Delta Across Both Years: {avg_delta:+.1%}")

if delta_2023 > 0 and delta_2024 > 0:
    print("\n✅ RECOMMENDATION: DEPLOY MOMENTUM FEATURES")
    print("   Momentum consistently improves accuracy on both 2023 and 2024")
    print("   This suggests the feature generalizes well and is not year-specific")
    print(f"   Expected improvement: {avg_delta:+.1%}")
elif delta_2023 < -0.01 or delta_2024 < -0.01:
    print("\n❌ RECOMMENDATION: DO NOT DEPLOY MOMENTUM FEATURES")
    print("   Momentum hurts performance on at least one test year")
    print("   Risk of year-specific overfitting or inconsistent signal")
    print("   Stick with baseline model")
else:
    print("\n⚠️ RECOMMENDATION: PROCEED WITH CAUTION")
    print("   Momentum shows mixed results or minimal impact")
    print("   Consider testing on 2022 data for additional validation")
    print("   May deploy if HC accuracy consistently improved")

# Check HC consistency
hc_2023 = results_2023['WITH MOMENTUM FEATURES'].get('hc_accuracy')
hc_2024 = results_2024['WITH MOMENTUM FEATURES'].get('hc_accuracy')

if hc_2023 and hc_2024:
    if hc_2023 >= 0.70 and hc_2024 >= 0.70:
        print(f"\n✅ HIGH-CONFIDENCE PICKS: Consistently strong (2023: {hc_2023:.1%}, 2024: {hc_2024:.1%})")
    elif hc_2023 < 0.65 or hc_2024 < 0.65:
        print(f"\n❌ HIGH-CONFIDENCE PICKS: Inconsistent or weak (2023: {hc_2023:.1%}, 2024: {hc_2024:.1%})")

print("\n" + "="*80)

## Save Results

In [None]:
# Save comparison
comparison_df.to_csv('investigation_3_momentum_validation_results.csv', index=False)
print("\n✅ Saved results to investigation_3_momentum_validation_results.csv")

print("\n" + "="*80)
print("INVESTIGATION 3 COMPLETE")
print("="*80)
print("\nNext Steps:")
print("1. Review comparison table above")
print("2. If momentum helps consistently → Proceed to deploy in Week 16")
print("3. If momentum inconsistent → Run investigation_4 to test feature pairs")
print("4. Document findings in feature_experiment_log.csv")