## NFL Picks

NFL match prediction with scores using historical data (1999-Present).

### Import Required Packages

In [None]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

print("Packages imported successfully!")

### Load Historical NFL Data

In [None]:
# Load historical data from 1999 to present
print("Loading NFL historical data from 1999 to present...")

# Get years from 1999 to current year
current_year = pd.Timestamp.now().year
years = list(range(1999, current_year + 1))

# Load play-by-play data (this might take a few minutes)
pbp_data = nfl.import_pbp_data(years)
print(f"Loaded play-by-play data: {pbp_data.shape}")

# Load schedule data
schedules = nfl.import_schedules(years)
print(f"Loaded schedules: {schedules.shape}")

# We'll derive team stats from the play-by-play and schedule data
print("Data loading complete!")

### Data Exploration and Understanding

In [None]:
# Explore the structure of our datasets
print("=== SCHEDULE DATA SAMPLE ===")
print(schedules.head())
print("\nSchedule columns:", schedules.columns.tolist())

print("\n=== PLAY BY PLAY SAMPLE ===")
print(pbp_data.head())
print(f"\nPBP columns count: {len(pbp_data.columns)}")

# Check for missing scores
print(f"\nGames with missing scores: {schedules[['home_score', 'away_score']].isnull().sum()}")

# Look at score distributions
completed_games = schedules.dropna(subset=['home_score', 'away_score'])
print(f"\nCompleted games: {len(completed_games)}")
print(f"Average home score: {completed_games['home_score'].mean():.1f}")
print(f"Average away score: {completed_games['away_score'].mean():.1f}")
print(f"Average total score: {(completed_games['home_score'] + completed_games['away_score']).mean():.1f}")

### Create Team Performance Features

In [None]:
# Create rolling team performance metrics
def calculate_team_performance_features(schedules_df):
    """Calculate rolling team performance metrics"""
    
    # Sort by team and date
    df = schedules_df.copy()
    df['game_date'] = pd.to_datetime(df['gameday'])
    
    # Create a list to store all team game records
    all_games = []
    
    # Process home games
    home_games = df[['season', 'week', 'game_date', 'home_team', 'home_score', 'away_score']].copy()
    home_games.columns = ['season', 'week', 'game_date', 'team', 'points_for', 'points_against']
    home_games['is_home'] = 1
    
    # Process away games
    away_games = df[['season', 'week', 'game_date', 'away_team', 'away_score', 'home_score']].copy()
    away_games.columns = ['season', 'week', 'game_date', 'team', 'points_for', 'points_against']
    away_games['is_home'] = 0
    
    # Combine all games
    all_games = pd.concat([home_games, away_games], ignore_index=True)
    all_games = all_games.sort_values(['team', 'game_date']).reset_index(drop=True)
    
    # Calculate rolling features (last 4 games)
    rolling_features = []
    
    for team in all_games['team'].unique():
        team_games = all_games[all_games['team'] == team].copy()
        
        # Calculate rolling averages
        team_games['avg_points_for_L4'] = team_games['points_for'].rolling(window=4, min_periods=1).mean().shift(1)
        team_games['avg_points_against_L4'] = team_games['points_against'].rolling(window=4, min_periods=1).mean().shift(1)
        team_games['win_rate_L4'] = ((team_games['points_for'] > team_games['points_against']).astype(int).rolling(window=4, min_periods=1).mean()).shift(1)
        
        # Calculate season stats up to current week
        team_games['season_points_for_avg'] = team_games.groupby('season')['points_for'].expanding().mean().shift(1).values
        team_games['season_points_against_avg'] = team_games.groupby('season')['points_against'].expanding().mean().shift(1).values
        
        rolling_features.append(team_games)
    
    return pd.concat(rolling_features, ignore_index=True)

print("Calculating team performance features...")
team_performance = calculate_team_performance_features(schedules)
print(f"Team performance features calculated: {team_performance.shape}")
print(team_performance.head())

### Merge Features with Game Data

In [None]:
# Create the main dataset for modeling
def create_modeling_dataset(schedules_df, team_perf_df):
    """Merge schedule data with team performance features"""
    
    df = schedules_df.copy()
    df['game_date'] = pd.to_datetime(df['gameday'])
    
    # Merge home team features
    home_features = team_perf_df[team_perf_df['is_home'] == 1].copy()
    home_features = home_features.add_suffix('_home')
    home_features.rename(columns={
        'team_home': 'home_team',
        'season_home': 'season',
        'week_home': 'week'
    }, inplace=True)
    
    df = df.merge(
        home_features[['season', 'week', 'home_team', 'avg_points_for_L4_home', 
                      'avg_points_against_L4_home', 'win_rate_L4_home',
                      'season_points_for_avg_home', 'season_points_against_avg_home']],
        on=['season', 'week', 'home_team'],
        how='left'
    )
    
    # Merge away team features
    away_features = team_perf_df[team_perf_df['is_home'] == 0].copy()
    away_features = away_features.add_suffix('_away')
    away_features.rename(columns={
        'team_away': 'away_team',
        'season_away': 'season',
        'week_away': 'week'
    }, inplace=True)
    
    df = df.merge(
        away_features[['season', 'week', 'away_team', 'avg_points_for_L4_away',
                      'avg_points_against_L4_away', 'win_rate_L4_away',
                      'season_points_for_avg_away', 'season_points_against_avg_away']],
        on=['season', 'week', 'away_team'],
        how='left'
    )
    
    return df

print("Creating modeling dataset...")
modeling_data = create_modeling_dataset(schedules, team_performance)

# Remove games without scores (future games)
modeling_data = modeling_data.dropna(subset=['home_score', 'away_score'])

print(f"Modeling dataset shape: {modeling_data.shape}")
print(f"Date range: {modeling_data['gameday'].min()} to {modeling_data['gameday'].max()}")

### Feature Engineering and Data Preparation

In [None]:
def prepare_features_v2(df):
    """Enhanced feature preparation with team quality indicators"""
    
    # Base features
    feature_cols = [
        'week', 'season',
        'avg_points_for_L4_home', 'avg_points_against_L4_home', 'win_rate_L4_home',
        'avg_points_for_L4_away', 'avg_points_against_L4_away', 'win_rate_L4_away',
        'season_points_for_avg_home', 'season_points_against_avg_home',
        'season_points_for_avg_away', 'season_points_against_avg_away'
    ]
    
    # Calculate strength metrics with better weighting
    df['home_off_strength'] = df['avg_points_for_L4_home'].fillna(21)
    df['home_def_strength'] = 21 - df['avg_points_against_L4_home'].fillna(21)  # Lower allowed = stronger D
    df['away_off_strength'] = df['avg_points_for_L4_away'].fillna(21) 
    df['away_def_strength'] = 21 - df['avg_points_against_L4_away'].fillna(21)
    
    # Matchup-specific features
    df['off_vs_def_home'] = df['home_off_strength'] + df['away_def_strength']  # Home O vs Away D
    df['off_vs_def_away'] = df['away_off_strength'] + df['home_def_strength']  # Away O vs Home D
    df['total_matchup_strength'] = df['off_vs_def_home'] + df['off_vs_def_away']
    
    # Team form indicators (last 4 games trend)
    df['home_form'] = df['win_rate_L4_home'].fillna(0.5)
    df['away_form'] = df['win_rate_L4_away'].fillna(0.5)
    df['form_advantage'] = df['home_form'] - df['away_form']
    
    # Home field advantage varies by team and situation
    df['home_advantage'] = 2.8  # Slightly reduced from 3.0
    
    # Early season adjustment (less reliable data)
    df['early_season_uncertainty'] = np.where(df['week'] <= 3, 1, 0)
    
    # Expected score differentials
    df['expected_home_edge'] = (df['home_off_strength'] - df['away_def_strength'] + 
                               df['away_off_strength'] - df['home_def_strength']) / 2
    
    feature_cols.extend([
        'home_off_strength', 'home_def_strength', 'away_off_strength', 'away_def_strength',
        'off_vs_def_home', 'off_vs_def_away', 'total_matchup_strength',
        'home_form', 'away_form', 'form_advantage', 'home_advantage',
        'early_season_uncertainty', 'expected_home_edge'
    ])
    
    # Fill remaining NaNs
    for col in feature_cols:
        if col in df.columns:
            df[col] = df[col].fillna(0)
    
    return df, feature_cols

### Train Models for Score Prediction

In [None]:
# Apply the enhanced features
modeling_data, feature_columns = prepare_features_v2(modeling_data)

# Prepare training data
X = modeling_data[feature_columns]
y_home = modeling_data['home_score']
y_away = modeling_data['away_score']

# More sophisticated train/test split - use recent seasons but ensure variety
train_mask = modeling_data['season'] <= 2022  # Use through 2022 for training
test_mask = modeling_data['season'] >= 2023   # Test on 2023+ 

X_train, X_test = X[train_mask], X[test_mask]
y_home_train, y_home_test = y_home[train_mask], y_home[test_mask]
y_away_train, y_away_test = y_away[train_mask], y_away[test_mask]

print(f"Training set: {len(X_train)} games")
print(f"Test set: {len(X_test)} games")

# Optimized parameters for score prediction accuracy
final_params = {
    'n_estimators': 800,
    'learning_rate': 0.02,
    'max_depth': 10,
    'subsample': 0.9,
    'max_features': 0.7,
    'min_samples_split': 15,
    'min_samples_leaf': 8,
    'random_state': 42,
    'validation_fraction': 0.2,
    'n_iter_no_change': 50,
    'tol': 1e-6
}

# Train separate models with different loss functions
models = {}

# Primary models
models['home_score'] = GradientBoostingRegressor(**final_params)
models['home_score'].fit(X_train, y_home_train)

models['away_score'] = GradientBoostingRegressor(**final_params) 
models['away_score'].fit(X_train, y_away_train)

# Margin model for better winner prediction
models['score_margin'] = GradientBoostingRegressor(**final_params)
models['score_margin'].fit(X_train, y_home_train - y_away_train)

print("Enhanced models trained with score calibration!")

### Evaluate Model Performance

In [None]:
# Make predictions
predictions = {}
predictions['home_score'] = models['home_score'].predict(X_test)
predictions['away_score'] = models['away_score'].predict(X_test)
predictions['score_margin'] = models['score_margin'].predict(X_test)

# Calculate total score from individual predictions
predictions['total_score'] = predictions['home_score'] + predictions['away_score']

# Create actual total scores for evaluation
y_total_test = y_home_test + y_away_test

# Calculate metrics
def evaluate_model(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n{model_name} Performance:")
    print(f"MAE: {mae:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"R²: {r2:.3f}")
    return mae, rmse, r2

# Evaluate all models
evaluate_model(y_home_test, predictions['home_score'], "Home Score Model")
evaluate_model(y_away_test, predictions['away_score'], "Away Score Model")
evaluate_model(y_total_test, predictions['total_score'], "Total Score Model")
evaluate_model(y_home_test - y_away_test, predictions['score_margin'], "Score Margin Model")

# Create results dataframe
results_df = modeling_data[test_mask].copy()
results_df['pred_home_score'] = predictions['home_score']
results_df['pred_away_score'] = predictions['away_score']
results_df['pred_total_score'] = predictions['total_score']
results_df['pred_score_margin'] = predictions['score_margin']

print(f"\nSample predictions:")
print(results_df[['home_team', 'away_team', 'home_score', 'away_score',
                 'pred_home_score', 'pred_away_score', 'pred_total_score']].head())

### Visualize Model Performance

In [None]:
# Create enhanced visualization plots with better analysis
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Make predictions for visualization
pred_home = models['home_score'].predict(X_test)
pred_away = models['away_score'].predict(X_test)
pred_margin = models['score_margin'].predict(X_test)

# Home Score Predictions
axes[0,0].scatter(y_home_test, pred_home, alpha=0.6, c='blue', s=30)
axes[0,0].plot([y_home_test.min(), y_home_test.max()], [y_home_test.min(), y_home_test.max()], 'r--', lw=2)
axes[0,0].set_xlabel('Actual Home Score')
axes[0,0].set_ylabel('Predicted Home Score')
axes[0,0].set_title(f'Home Score Predictions\nMAE: {mean_absolute_error(y_home_test, pred_home):.1f}')
axes[0,0].grid(True, alpha=0.3)

# Away Score Predictions
axes[0,1].scatter(y_away_test, pred_away, alpha=0.6, c='red', s=30)
axes[0,1].plot([y_away_test.min(), y_away_test.max()], [y_away_test.min(), y_away_test.max()], 'r--', lw=2)
axes[0,1].set_xlabel('Actual Away Score')
axes[0,1].set_ylabel('Predicted Away Score')
axes[0,1].set_title(f'Away Score Predictions\nMAE: {mean_absolute_error(y_away_test, pred_away):.1f}')
axes[0,1].grid(True, alpha=0.3)

# Score Margin Predictions (new)
actual_margin = y_home_test - y_away_test
axes[0,2].scatter(actual_margin, pred_margin, alpha=0.6, c='green', s=30)
axes[0,2].plot([actual_margin.min(), actual_margin.max()], [actual_margin.min(), actual_margin.max()], 'r--', lw=2)
axes[0,2].axhline(y=0, color='black', linestyle='-', alpha=0.5)
axes[0,2].axvline(x=0, color='black', linestyle='-', alpha=0.5)
axes[0,2].set_xlabel('Actual Score Margin (Home - Away)')
axes[0,2].set_ylabel('Predicted Score Margin')
axes[0,2].set_title(f'Score Margin Predictions\nMAE: {mean_absolute_error(actual_margin, pred_margin):.1f}')
axes[0,2].grid(True, alpha=0.3)

# Winner Prediction Accuracy
winner_correct = ((actual_margin > 0) == (pred_margin > 0)).sum()
total_games = len(actual_margin)
winner_accuracy = winner_correct / total_games

# Create winner prediction visualization
correct_winners = (actual_margin > 0) == (pred_margin > 0)
axes[1,0].scatter(actual_margin[correct_winners], pred_margin[correct_winners], 
                 alpha=0.6, c='green', s=30, label='Correct Winner')
axes[1,0].scatter(actual_margin[~correct_winners], pred_margin[~correct_winners], 
                 alpha=0.6, c='red', s=30, label='Wrong Winner')
axes[1,0].plot([actual_margin.min(), actual_margin.max()], [actual_margin.min(), actual_margin.max()], 'k--', lw=2)
axes[1,0].axhline(y=0, color='black', linestyle='-', alpha=0.5)
axes[1,0].axvline(x=0, color='black', linestyle='-', alpha=0.5)
axes[1,0].set_xlabel('Actual Margin')
axes[1,0].set_ylabel('Predicted Margin')
axes[1,0].set_title(f'Winner Prediction Accuracy: {winner_accuracy:.1%}')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# Feature Importance
importance = models['home_score'].feature_importances_
feature_imp = pd.DataFrame({'feature': feature_columns, 'importance': importance})
feature_imp = feature_imp.sort_values('importance', ascending=True)

# Only show top 10 most important features for readability
top_features = feature_imp.tail(10)
axes[1,1].barh(range(len(top_features)), top_features['importance'])
axes[1,1].set_yticks(range(len(top_features)))
axes[1,1].set_yticklabels(top_features['feature'], fontsize=9)
axes[1,1].set_xlabel('Feature Importance')
axes[1,1].set_title('Top 10 Feature Importance\n(Home Score Model)')
axes[1,1].grid(True, alpha=0.3, axis='x')

# Prediction Error Distribution
prediction_errors = pred_home - y_home_test
axes[1,2].hist(prediction_errors, bins=30, alpha=0.7, color='purple', edgecolor='black')
axes[1,2].axvline(x=0, color='red', linestyle='--', lw=2)
axes[1,2].set_xlabel('Prediction Error (Predicted - Actual)')
axes[1,2].set_ylabel('Frequency')
axes[1,2].set_title(f'Home Score Error Distribution\nMean Error: {prediction_errors.mean():.1f}')
axes[1,2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print detailed performance metrics
print("=== ENHANCED MODEL PERFORMANCE ANALYSIS ===")
print(f"Home Score MAE: {mean_absolute_error(y_home_test, pred_home):.2f}")
print(f"Away Score MAE: {mean_absolute_error(y_away_test, pred_away):.2f}")
print(f"Score Margin MAE: {mean_absolute_error(actual_margin, pred_margin):.2f}")
print(f"Winner Prediction Accuracy: {winner_accuracy:.1%}")
print(f"Games within 3 points (home): {(abs(pred_home - y_home_test) <= 3).sum()}/{len(y_home_test)} ({(abs(pred_home - y_home_test) <= 3).mean():.1%})")
print(f"Games within 7 points (home): {(abs(pred_home - y_home_test) <= 7).sum()}/{len(y_home_test)} ({(abs(pred_home - y_home_test) <= 7).mean():.1%})")

# Close game analysis (margin <= 7 points)
close_games = abs(actual_margin) <= 7
close_game_accuracy = ((actual_margin[close_games] > 0) == (pred_margin[close_games] > 0)).mean()
print(f"Close Games (≤7 pts) Winner Accuracy: {close_game_accuracy:.1%}")

# Blowout game analysis (margin > 14 points) 
blowout_games = abs(actual_margin) > 14
if blowout_games.sum() > 0:
    blowout_accuracy = ((actual_margin[blowout_games] > 0) == (pred_margin[blowout_games] > 0)).mean()
    print(f"Blowout Games (>14 pts) Winner Accuracy: {blowout_accuracy:.1%}")

### Make Predictions for Future Games

In [None]:
class NFLPredictor:
    """NFL game prediction class with team performance tracking"""
    
    def __init__(self):
        # Updated 2025 season team performance tracking (through Week 2)
        self.team_stats = {
            # AFC East
            'BUF': {'ppg': 31.0, 'papg': 13.5, 'wins': 2, 'games': 2, 'form': 0.85},
            'MIA': {'ppg': 17.5, 'papg': 23.0, 'wins': 1, 'games': 2, 'form': 0.6},
            'NYJ': {'ppg': 18.0, 'papg': 26.5, 'wins': 1, 'games': 2, 'form': 0.5},
            'NE': {'ppg': 16.5, 'papg': 23.5, 'wins': 1, 'games': 2, 'form': 0.4},
            
            # AFC North  
            'BAL': {'ppg': 25.5, 'papg': 23.0, 'wins': 1, 'games': 2, 'form': 0.7},
            'CIN': {'ppg': 25.0, 'papg': 29.0, 'wins': 0, 'games': 2, 'form': 0.3},
            'PIT': {'ppg': 21.5, 'papg': 16.5, 'wins': 2, 'games': 2, 'form': 0.8},
            'CLE': {'ppg': 15.5, 'papg': 30.5, 'wins': 0, 'games': 2, 'form': 0.2},
            
            # AFC South
            'HOU': {'ppg': 25.5, 'papg': 20.5, 'wins': 2, 'games': 2, 'form': 0.8},
            'IND': {'ppg': 23.0, 'papg': 24.5, 'wins': 1, 'games': 2, 'form': 0.6},
            'JAX': {'ppg': 16.5, 'papg': 23.0, 'wins': 0, 'games': 2, 'form': 0.3},
            'TEN': {'ppg': 17.0, 'papg': 38.0, 'wins': 0, 'games': 2, 'form': 0.1},
            
            # AFC West
            'KC': {'ppg': 28.5, 'papg': 19.0, 'wins': 2, 'games': 2, 'form': 0.9},
            'LAC': {'ppg': 18.5, 'papg': 17.0, 'wins': 1, 'games': 2, 'form': 0.7},
            'DEN': {'ppg': 23.0, 'papg': 16.5, 'wins': 2, 'games': 2, 'form': 0.8},
            'LV': {'ppg': 16.5, 'papg': 29.0, 'wins': 0, 'games': 2, 'form': 0.2},
            
            # NFC East
            'PHI': {'ppg': 22.5, 'papg': 21.5, 'wins': 1, 'games': 2, 'form': 0.6},
            'DAL': {'ppg': 26.0, 'papg': 23.5, 'wins': 1, 'games': 2, 'form': 0.6},
            'NYG': {'ppg': 17.0, 'papg': 30.0, 'wins': 0, 'games': 2, 'form': 0.2},
            'WAS': {'ppg': 35.5, 'papg': 20.5, 'wins': 2, 'games': 2, 'form': 0.9},
            
            # NFC North
            'DET': {'ppg': 26.5, 'papg': 21.0, 'wins': 1, 'games': 2, 'form': 0.7},
            'GB': {'ppg': 27.0, 'papg': 22.5, 'wins': 1, 'games': 2, 'form': 0.7},
            'MIN': {'ppg': 22.0, 'papg': 15.0, 'wins': 2, 'games': 2, 'form': 0.8},
            'CHI': {'ppg': 17.0, 'papg': 24.0, 'wins': 1, 'games': 2, 'form': 0.5},
            
            # NFC South
            'NO': {'ppg': 32.5, 'papg': 17.5, 'wins': 2, 'games': 2, 'form': 0.9},
            'ATL': {'ppg': 23.5, 'papg': 29.0, 'wins': 0, 'games': 2, 'form': 0.3},
            'TB': {'ppg': 27.0, 'papg': 24.0, 'wins': 1, 'games': 2, 'form': 0.6},
            'CAR': {'ppg': 16.5, 'papg': 34.0, 'wins': 0, 'games': 2, 'form': 0.1},
            
            # NFC West
            'LAR': {'ppg': 24.0, 'papg': 19.5, 'wins': 1, 'games': 2, 'form': 0.7},
            'SF': {'ppg': 25.5, 'papg': 17.5, 'wins': 2, 'games': 2, 'form': 0.8},
            'SEA': {'ppg': 23.0, 'papg': 19.5, 'wins': 2, 'games': 2, 'form': 0.8},
            'ARI': {'ppg': 25.0, 'papg': 28.5, 'wins': 1, 'games': 2, 'form': 0.5}
        }
    
    def predict_game(self, home_team, away_team, week=3, season=2025, verbose=True):
        """
        Predict the outcome of an NFL game between specified teams
        
        Args:
            home_team (str): Home team abbreviation (e.g., 'TB', 'KC')
            away_team (str): Away team abbreviation  
            week (int): Week number (default: 3)
            season (int): Season year (default: 2025)
            verbose (bool): Print debug information (default: True)
        
        Returns:
            dict: Prediction results including scores, winner, and confidence
        """
        
        # Get team stats (use defaults if not found)
        home_stats = self.team_stats.get(home_team, {
            'ppg': 21.0, 'papg': 21.0, 'wins': 1, 'games': 2, 'form': 0.5
        })
        
        away_stats = self.team_stats.get(away_team, {
            'ppg': 21.0, 'papg': 21.0, 'wins': 1, 'games': 2, 'form': 0.5
        })
        
        # Calculate offensive and defensive ratings
        home_offense = home_stats['ppg']
        home_defense = 42 - home_stats['papg']  # Higher = better defence
        away_offense = away_stats['ppg']  
        away_defense = 42 - away_stats['papg']
        
        # Base score projection (offence vs defence matchup)
        home_base = (home_offense + (42 - away_defense)) / 2
        away_base = (away_offense + (42 - home_defense)) / 2
        
        # Apply home field advantage
        home_field_advantage = 2.8
        home_base += home_field_advantage
        
        # Form/momentum adjustment
        form_diff = home_stats['form'] - away_stats['form']
        momentum_factor = form_diff * 3.5
        home_base += momentum_factor
        away_base -= momentum_factor
        
        # Early season regression to mean (weeks 1-4)
        if week <= 4:
            regression_factor = 0.25
            league_avg = 21.5
            home_base = home_base * (1 - regression_factor) + league_avg * regression_factor
            away_base = away_base * (1 - regression_factor) + (league_avg - 1) * regression_factor
        
        # Apply realistic bounds and round to whole numbers
        predicted_home = round(max(7, min(50, home_base)))
        predicted_away = round(max(7, min(50, away_base)))
        
        # Calculate additional metrics
        margin = abs(predicted_home - predicted_away)
        confidence = min(0.95, max(0.55, margin / 20))
        winner = home_team if predicted_home > predicted_away else away_team
        
        # Debug output
        if verbose:
            print(f"\n=== {home_team} vs {away_team} PREDICTION ===")
            print(f"{home_team} (Home): {home_stats['ppg']:.1f} PPG, {home_stats['papg']:.1f} PAPG, Form: {home_stats['form']:.1f}")
            print(f"{away_team} (Away): {away_stats['ppg']:.1f} PPG, {away_stats['papg']:.1f} PAPG, Form: {away_stats['form']:.1f}")
            print(f"Home advantage: +{home_field_advantage}")
            print(f"Momentum factor: {momentum_factor:+.1f} (favours {home_team if momentum_factor > 0 else away_team})")
        
        return {
            'home_team': home_team,
            'away_team': away_team,
            'week': week,
            'season': season,
            'predicted_home_score': int(predicted_home),
            'predicted_away_score': int(predicted_away),
            'predicted_total': int(predicted_home + predicted_away),
            'predicted_winner': winner,
            'predicted_margin': int(margin),
            'confidence': round(confidence, 2),
            'home_win_probability': round(predicted_home / (predicted_home + predicted_away), 3) if predicted_home != predicted_away else 0.5
        }
    
    def update_team_stats(self, team, points_for, points_against):
        """Update team statistics after a game"""
        if team in self.team_stats:
            stats = self.team_stats[team]
            total_games = stats['games']
            
            # Update averages
            stats['ppg'] = (stats['ppg'] * total_games + points_for) / (total_games + 1)
            stats['papg'] = (stats['papg'] * total_games + points_against) / (total_games + 1)
            
            # Update wins and form
            if points_for > points_against:
                stats['wins'] += 1
                stats['form'] = min(0.95, stats['form'] + 0.1)
            else:
                stats['form'] = max(0.05, stats['form'] - 0.1)
                
            stats['games'] += 1

    def get_league_standings(self):
        """Display current league standings by division"""
        divisions = {
            'AFC East': ['BUF', 'MIA', 'NYJ', 'NE'],
            'AFC North': ['BAL', 'CIN', 'PIT', 'CLE'],
            'AFC South': ['HOU', 'IND', 'JAX', 'TEN'],
            'AFC West': ['KC', 'LAC', 'DEN', 'LV'],
            'NFC East': ['PHI', 'DAL', 'NYG', 'WAS'],
            'NFC North': ['DET', 'GB', 'MIN', 'CHI'],
            'NFC South': ['NO', 'ATL', 'TB', 'CAR'],
            'NFC West': ['LAR', 'SF', 'SEA', 'ARI']
        }
        
        print("\n=== 2025 NFL STANDINGS (Through Week 2) ===")
        for division, teams in divisions.items():
            print(f"\n{division}:")
            # Sort teams by wins, then by point differential
            sorted_teams = sorted(teams, key=lambda t: (
                self.team_stats[t]['wins'],
                self.team_stats[t]['ppg'] - self.team_stats[t]['papg']
            ), reverse=True)
            
            for team in sorted_teams:
                stats = self.team_stats[team]
                record = f"{stats['wins']}-{stats['games'] - stats['wins']}"
                diff = stats['ppg'] - stats['papg']
                print(f"  {team}: {record} ({diff:+.1f})")


# Usage example
predictor = NFLPredictor()

def predict_matchup(home, away, week=3):
    """Simple wrapper function for predictions"""
    return predictor.predict_game(home, away, week)

# Test the function
print("=== NFL PREDICTION SYSTEM ===")

# Show current standings
predictor.get_league_standings()

# Test some Week 2 & 3 matchups
test_games = [
    ('HOU', 'TB', 2),
    ('LV', 'LAC', 2),
    ('BUF', 'MIA', 3),
    ('GB', 'CLE', 3),
]

print("\n=== WEEK 3 PREDICTIONS ===")
for home, away, week in test_games:
    result = predict_matchup(home, away, week)
    print(f"\nWeek {week}: {result['predicted_winner']} wins {result['predicted_home_score']}-{result['predicted_away_score']}")
    print(f"Confidence: {result['confidence']:.0%}, Margin: {result['predicted_margin']} pts")

### Batch Predictions for Multiple Games

In [None]:
# Function to predict multiple games using NFLPredictor class
def predict_multiple_games_updated(games_list, predictor):
    """
    Predict scores for multiple games using NFLPredictor class
    games_list: list of tuples (home_team, away_team, week, season)
    predictor: NFLPredictor instance
    """
    predictions = []
    for home_team, away_team, week, season in games_list:
        try:
            pred = predictor.predict_game(home_team, away_team, week, season, verbose=False)
            # Round the predicted scores to remove decimals
            if pred is not None and isinstance(pred, dict):
                pred['predicted_home_score'] = round(pred['predicted_home_score'])
                pred['predicted_away_score'] = round(pred['predicted_away_score'])
                # Recalculate margin and total with rounded scores
                pred['predicted_margin'] = abs(pred['predicted_home_score'] - pred['predicted_away_score'])
                pred['predicted_total'] = pred['predicted_home_score'] + pred['predicted_away_score']
            predictions.append(pred)
        except Exception as error:
            print(f"Could not predict {home_team} vs {away_team}: {error}")
    return pd.DataFrame(predictions)

# Initialize the predictor
predictor = NFLPredictor()

# Week 3 games (Sept 19-22, 2025)
week_3_games = [
    ('BUF', 'MIA', 3, 2025),  # Thursday Night Football
    ('GB', 'CLE', 3, 2025),
    ('IND', 'TEN', 3, 2025),
    ('CIN', 'MIN', 3, 2025),
    ('NE', 'PIT', 3, 2025),
    ('TB', 'NYJ', 3, 2025),
    ('WAS', 'LAC', 3, 2025),
    ('PHI', 'KC', 3, 2025),   # Super Bowl rematch
    ('SEA', 'CAR', 3, 2025),
    ('DEN', 'HOU', 3, 2025),
    ('LV', 'SF', 3, 2025),
    ('DAL', 'ARI', 3, 2025),
    ('ATL', 'NO', 3, 2025),
    ('DET', 'CHI', 3, 2025),
    ('LAR', 'NYG', 3, 2025),
    ('BAL', 'JAX', 3, 2025)   # Monday Night Football
]

print("=== PREDICTING WEEK 3 GAMES (Sept 19-22, 2025) ===")
week_3_predictions = predict_multiple_games_updated(week_3_games, predictor)

if len(week_3_predictions) > 0:
    print("\nWeek 3 Predictions:")
    display_cols = ['home_team', 'away_team', 'predicted_home_score',
                   'predicted_away_score', 'predicted_winner', 'predicted_margin']
    print(week_3_predictions[display_cols].to_string(index=False))

    # Highlight marquee matchups
    print("\nMARQUEE MATCHUPS:")
    marquee_games = week_3_predictions[
        ((week_3_predictions['home_team'] == 'PHI') & (week_3_predictions['away_team'] == 'KC')) |
        ((week_3_predictions['home_team'] == 'BUF') & (week_3_predictions['away_team'] == 'MIA'))
    ]
    for _, game in marquee_games.iterrows():
        print(f"{game['home_team']} vs {game['away_team']}: {game['predicted_home_score']}-{game['predicted_away_score']} (Winner: {game['predicted_winner']})")

# Weekly breakdown function
def weekly_breakdown():
    """Display predictions by week with key matchups highlighted"""
    weeks_data = [
        (3, week_3_predictions, "Week 3 Features: TNF Bills-Dolphins, Eagles-Chiefs Super Bowl rematch"),
    ]
    
    for week_num, predictions, features in weeks_data:
        if len(predictions) > 0:
            print(f"\n=== WEEK {week_num} ANALYSIS ===")
            print(features)
            
            # Show games with smallest margins (closest games)
            close_games = predictions.nsmallest(3, 'predicted_margin')
            print(f"\nClosest games:")
            for _, game in close_games.iterrows():
                print(f"  {game['home_team']} vs {game['away_team']}: {game['predicted_margin']} pt margin")
            
            # Show highest scoring games
            high_scoring = predictions.nlargest(3, 'predicted_total')
            print(f"\nHighest scoring games:")
            for _, game in high_scoring.iterrows():
                print(f"  {game['home_team']} vs {game['away_team']}: {game['predicted_total']} total points")

weekly_breakdown()