## NFL PLayer WAR

Create a SportsIQ Score (WAR type). This score should range from 0 - 100 scale

This will take into account a players:
- EPA / Play
- Success Rate
- CPOE (QB)
- YPRR (WR / TE)
- Pass block / run block grades (OL)
- Pressure rate (DL / EDGE)
- win rate (DL / EDGE)
- Coverage success / penalties (DB)
- Special teams snaps
- Field Goal Made / Missed (K)

In [771]:
import nflreadpy as nfl
import pandas as pd
import numpy as np
from abc import ABC, abstractmethod
from typing import Dict, Any
import os
from datetime import datetime, timedelta

In [772]:
def load_df_master_with_cache(cache_file='nfl_player_master_2025.csv', cache_hours=24):
    """
    Load df_master from cache if available and recent, otherwise regenerate.
    
    Args:
        cache_file: Name of CSV file to cache to
        cache_hours: Number of hours before cache expires (default: 24)
    
    Returns:
        df_master: The loaded/generated DataFrame
        from_cache: Boolean indicating if data came from cache
    """
    print(f"\nChecking for cached df_master in '{cache_file}'...")

    # Check if cache file exists and is fresh
    if os.path.exists(cache_file):
        file_age = datetime.now() - datetime.fromtimestamp(os.path.getmtime(cache_file))
        if file_age < timedelta(hours=cache_hours):
            print(f"✓ Loading from cache: {cache_file}")
            print(f"  Cache age: {file_age.total_seconds() / 3600:.1f} hours")
            return pd.read_csv(cache_file), True
        else:
            print(f"⊘ Cache expired ({file_age.total_seconds() / 3600:.1f} hours old), regenerating...")
    else:
        print(f"⊘ Cache file not found, generating new data...")
    
    # Regenerate data
    print("\nLoading data from NFL API...")
    
    # 1. Load Rosters
    roster = nfl.load_rosters(seasons=[2025]).to_pandas()
    roster_key = roster[['full_name', 'position', 'team', 'gsis_id', 'pfr_id', 'depth_chart_position']]
    roster_key = roster_key[roster_key['gsis_id'].notna()]
    roster_key = roster_key[roster_key['pfr_id'].notna()]
    print("  ✓ Roster loaded")
    
    # 2. Load Snap Counts
    snaps = nfl.load_snap_counts(seasons=[2025]).to_pandas()
    snaps_season = snaps.groupby('pfr_player_id')[['offense_snaps', 'defense_snaps', 'st_snaps']].sum().reset_index()
    # Count unique games played per player
    games_played = snaps.groupby('pfr_player_id')['week'].nunique().reset_index().rename(columns={'week': 'games_played'})
    snaps_season = snaps_season.merge(games_played, on='pfr_player_id', how='left')
    print("  ✓ Snap counts loaded")
    
    # 3. Load Player Stats
    player_stats = nfl.load_player_stats(seasons=[2025], summary_level='reg').to_pandas()
    adv_def = nfl.load_pfr_advstats(seasons=[2025], summary_level='season', stat_type='def').to_pandas()
    adv_pass = nfl.load_pfr_advstats(seasons=[2025], summary_level='season', stat_type='pass').to_pandas()
    adv_rush = nfl.load_pfr_advstats(seasons=[2025], summary_level='season', stat_type='rush').to_pandas()
    adv_rec = nfl.load_pfr_advstats(seasons=[2025], summary_level='season', stat_type='rec').to_pandas()
    print("  ✓ Player stats loaded")
    
    # 4. Merge all data
    df_master = roster_key.copy()
    df_master = df_master.merge(snaps_season, left_on='pfr_id', right_on='pfr_player_id', how='left')
    df_master = df_master.merge(player_stats, left_on='gsis_id', right_on='player_id', how='left')
    df_master = df_master.merge(adv_def, on='pfr_id', how='left', suffixes=('', '_def'))
    df_master = df_master.merge(adv_pass, on='pfr_id', how='left', suffixes=('', '_pass'))
    df_master = df_master.merge(adv_rush, on='pfr_id', how='left', suffixes=('', '_rush'))
    df_master = df_master.merge(adv_rec, on='pfr_id', how='left', suffixes=('', '_rec'))
    print("  ✓ Data merged")
    
    # 5. Clean up
    df_master = df_master.drop_duplicates(subset=['gsis_id'], keep='first')
    df_master = df_master[(df_master['offense_snaps'] > 0) | (df_master['defense_snaps'] > 0) | (df_master['st_snaps'] > 0)]
    df_master = df_master.dropna(subset=['offense_snaps', 'defense_snaps', 'st_snaps'])
    print("  ✓ Data cleaned")
    
    # Save to cache
    df_master.to_csv(cache_file, index=False)
    print(f"✓ Data saved to cache: {cache_file}")
    
    return df_master, False



In [773]:
# Load or generate df_master
df_master, from_cache = load_df_master_with_cache()


Checking for cached df_master in 'nfl_player_master_2025.csv'...
✓ Loading from cache: nfl_player_master_2025.csv
  Cache age: 1.1 hours


In [774]:
# 4.b Deduplicate by unique player (gsis_id), keeping first occurrence
df_master = df_master.drop_duplicates(subset=['gsis_id'], keep='first')

# Remove players with 0 snaps or null values
df_master = df_master[(df_master['offense_snaps'] > 0) | (df_master['defense_snaps'] > 0) | (df_master['st_snaps'] > 0)]
df_master = df_master.dropna(subset=['offense_snaps', 'defense_snaps', 'st_snaps'])

In [775]:
df_master.shape

(1780, 231)

In [776]:
# Check for duplicates
print("Checking for duplicates by full_name:")
duplicate_names = df_master['full_name'].value_counts()
duplicates = duplicate_names[duplicate_names > 1]
print(f"\nFound {len(duplicates)} players with multiple rows:")
print(duplicates)

# Show Joe Flacco example
print("\n\nJoe Flacco rows:")
print(df_master[df_master['full_name'] == 'Joe Flacco'][['full_name', 'gsis_id', 'pfr_id', 'team', 'depth_chart_position']])

print("\n\nColumn names:")
print(df_master.columns.tolist())

Checking for duplicates by full_name:

Found 3 players with multiple rows:
full_name
Jordan Phillips    2
Jaylon Jones       2
Byron Young        2
Name: count, dtype: int64


Joe Flacco rows:
    full_name     gsis_id    pfr_id team depth_chart_position
5  Joe Flacco  00-0026158  FlacJo00  CIN                   QB


Column names:
['full_name', 'position_x', 'team', 'gsis_id', 'pfr_id', 'depth_chart_position', 'pfr_player_id', 'offense_snaps', 'defense_snaps', 'st_snaps', 'games_played', 'player_id', 'player_name', 'player_display_name', 'position_y', 'position_group', 'headshot_url', 'season', 'season_type', 'recent_team', 'games', 'completions', 'attempts', 'passing_yards', 'passing_tds', 'passing_interceptions', 'sacks_suffered', 'sack_yards_lost', 'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards', 'passing_yards_after_catch', 'passing_first_downs', 'passing_epa', 'passing_cpoe', 'passing_2pt_conversions', 'pacr', 'carries', 'rushing_yards', 'rushing_tds', 'rushing_fumbles', 

In [777]:
# Examine remaining duplicates to understand if they're legitimate
print("Remaining duplicates - checking if they're different players:\n")
for dup_name in ['Jordan Phillips', 'Jaylon Jones', 'Byron Young']:
    print(f"\n{dup_name}:")
    dup_rows = df_master[df_master['full_name'] == dup_name][['full_name', 'gsis_id', 'pfr_id', 'team', 'position_x', 'depth_chart_position']]
    print(dup_rows)

Remaining duplicates - checking if they're different players:


Jordan Phillips:
            full_name     gsis_id    pfr_id team position_x  \
68    Jordan Phillips  00-0031557  PhilJo01  BUF         DL   
1578  Jordan Phillips  00-0040175  PhilJo02  MIA         DL   

     depth_chart_position  
68                     DT  
1578                   DT  

Jaylon Jones:
         full_name     gsis_id    pfr_id team position_x depth_chart_position
813   Jaylon Jones  00-0037106  JoneJa12  CHI         DB                   CB
1042  Jaylon Jones  00-0038407  JoneJa13  IND         DB                   CB

Byron Young:
        full_name     gsis_id    pfr_id team position_x depth_chart_position
1198  Byron Young  00-0038978  YounBy00  PHI         DL                   DT
1243  Byron Young  00-0039137  YounBy01   LA         LB                  OLB


In [778]:
class PositionScorer(ABC):
    """
    Scores players based on weighted Z-scores, normalized to a 0-100 scale.
    Weights do NOT need to sum to 1.
    """
    
    def __init__(self, position: str):
        self.position = position
        
    @abstractmethod
    def filter_players(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filter the DataFrame to only include relevant players for this position."""
        pass

    @abstractmethod
    def compute_metrics(self, row: pd.Series) -> Dict[str, float]:
        """Extract raw stats from a row."""
        pass
    
    @abstractmethod
    def get_config(self) -> Dict[str, Dict[str, float]]:
        """
        Configuration for metrics.
        - 'weight': Relative importance (e.g., 10.0 for huge impact, 1.0 for minor).
        - 'direction': 1 (Higher is Better) or -1 (Lower is Better).
        """
        pass

    def _calculate_raw_score(self, row: pd.Series, peer_stats: pd.DataFrame, config: Dict) -> float:
        """Internal helper to calculate the raw weighted sum."""
        raw_score = 0.0
        
        for metric, conf in config.items():
            val = self.compute_metrics(row).get(metric, 0)
            weight = conf['weight']
            direction = conf['direction']
            
            # Get peer distribution for this specific metric
            # (In production, cache these means/stds to speed up)
            peer_vals = peer_stats.apply(lambda r: self.compute_metrics(r).get(metric, 0), axis=1)
            
            if len(peer_vals) > 1 and np.std(peer_vals) > 0:
                mean = np.mean(peer_vals)
                std = np.std(peer_vals)
                z_score = (val - mean) / std
            else:
                z_score = 0.0
                
            # Add to total: Z-Score * Weight * Direction
            # Example: Bad Throw (Z=2.0) * Weight(5) * Dir(-1) = -10.0 impact
            raw_score += (z_score * weight * direction)
            
        return raw_score

    def score_all(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Scores all players and returns detailed breakdown of what adds up to raw_score.
        """
        # Filter for this position
        peers = df[df['depth_chart_position'] == self.position].copy()

        print(f"\nScoring position: {self.position} with {len(peers)} players")
        
        if peers.empty:
            return peers

        config = self.get_config()
        metric_names = list(config.keys())
        
        # 1. Compute raw metrics for all peers at once
        metrics_df = peers.apply(lambda r: pd.Series(self.compute_metrics(r)), axis=1)
        metrics_df = metrics_df.reindex(columns=metric_names)

        print(f"Metrics DataFrame shape: {metrics_df.shape}")

        # Remove player with filter method
        metrics_df = self.filter_players(metrics_df)
        if metrics_df.empty:
            return metrics_df
        
        print(f"Filtered Metrics DataFrame shape: {metrics_df.shape}")
        
        # 2. Calculate peer distribution (mean/std) per metric
        means = metrics_df.mean(skipna=True)
        stds = metrics_df.std(ddof=0, skipna=True)  # Population std
        
        # 3. Compute z-scores (fill NaN with 0 when std=0)
        z_scores_df = (metrics_df - means) / stds.replace(0, np.nan)
        z_scores_df = z_scores_df.fillna(0.0)
        
        # 4. Compute weighted directional contributions per metric
        weights = pd.Series({m: config[m]['weight'] for m in metric_names})
        directions = pd.Series({m: config[m]['direction'] for m in metric_names})
        contrib_df = z_scores_df.multiply(weights, axis=1).multiply(directions, axis=1)
        
        # 5. Raw score is sum of contributions
        peers['raw_score'] = contrib_df.sum(axis=1)
        
        # 6. Convert to T-Score (0-100 scale)
        raw_mean = peers['raw_score'].mean()
        raw_std = peers['raw_score'].std()
        
        if raw_std == 0:
            peers['SportsIQ'] = 50.0
        else:
            peers['SportsIQ'] = ((peers['raw_score'] - raw_mean) / raw_std) * 10 + 50
        
        peers['SportsIQ'] = peers['SportsIQ'].clip(0, 99)
        
        # 7. Attach all breakdown columns
        peers = peers.join(metrics_df.add_prefix('metric_'))
        peers = peers.join(z_scores_df.add_prefix('z_'))
        peers = peers.join(contrib_df.add_prefix('contrib_'))
        
        # Return in logical order: identifiers → scores → breakdown
        result_cols = (
            ['player_name', 'depth_chart_position', 'SportsIQ', 'raw_score']
            + [f'metric_{m}' for m in metric_names]
            + [f'z_{m}' for m in metric_names]
            + [f'contrib_{m}' for m in metric_names]
        )
        return peers[[c for c in result_cols if c in peers.columns]]



In [779]:
# QB Scorer

class QBScorer(PositionScorer):
	def __init__(self): super().__init__("QB")

	def filter_players(self, df):
			# Filter to QBs with at least 100 passing attempts
			return df[df['attempts'] >= 50]

	def compute_metrics(self, row):
		atts = max(1, row.get('pass_attempts', 1))
		snaps = max(1, row.get('offense_snaps', 1))
		games_played = max(1, row.get('games_played', 1))
		return {
			'epa_per_play': row.get('passing_epa', 0), # EPA is already per-play usually
			'cpoe': row.get('passing_cpoe', 0),
			'bad_throw_pct': row.get('bad_throws', 0) / atts,
			'sack_pct': row.get('sacks_suffered', 0) / snaps,
			'interception_pct': row.get('passing_interceptions', 0) / atts,
			'air_yards': row.get('intended_air_yards_per_pass_attempt', 0),
			'accuracy': row.get('on_tgt_throws', 0) / atts,
			'attempts': atts,
			'pacr': row.get('pacr', 0) ,
			'passing_td_per_game': row.get('passing_tds', 0) / games_played,
			'completions_pct': row.get('completions', 0) / atts,
		}

	def get_config(self):
		return {
			'epa_per_play':    			{'weight': 9.0, 'direction': 1},
			'pacr': 					{'weight': 7.0, 'direction': 1},
			'bad_throw_pct':   			{'weight': 7.0, 'direction': -1},  
			'air_yards': 				{'weight': 6.0, 'direction': 1},
			'passing_td_per_game' : 	{'weight': 5.0, 'direction': 1},
			'cpoe':            			{'weight': 5.0, 'direction': 1},
			'attempts':        			{'weight': 4.0, 'direction': 1},
			'sack_pct':        			{'weight': 3.0, 'direction': -1},
			'completions_pct': 			{'weight': 3.0, 'direction': 1},
			'interception_pct': 		{'weight': 3.0, 'direction': -1},
			'accuracy':        			{'weight': 2.0, 'direction': 1}
		}

In [780]:
# RB Scorer 
    
class RBScorer(PositionScorer):
	def __init__(self): super().__init__("RB")

	def filter_players(self, df):
		# Filter to RBs with at least 50 carries
		return df[df['carries'] >= 50]

	def compute_metrics(self, row):
		carries = max(1, row.get('carries', 1))
		games_played = max(1, row.get('games_played', 1))	
		return {
			'rushing_epa': row.get('rushing_epa', 0), # EPA is already per-play usually
			'receiving_epa': row.get('receiving_epa', 0),
			'yac_att': row.get('yac_att', 0),
			'rushing_yards': row.get('rushing_yards', 0),
			'receiving_yards': row.get('receiving_yards', 0),
			'rushing_first_downs': row.get('rushing_first_downs', 0),
			'rushing_tds': row.get('rushing_tds', 0),
			'broken_tackles': row.get('brk_tkl', 0),
			'receiving_yards_after_catch': row.get('receiving_yards_after_catch', 0),
			'fumbles': row.get('rushing_fumbles', 0),
			'drop_percent': row.get('drop_percent', 0),
			'carries': carries,
			'carries_per_game': carries / games_played
		}

	def get_config(self):
		return {
			# --- Elite Efficiency (The "Talent" Metrics) ---
			# EPA measures actual value added. Crucial for separating empty yards from impactful ones.
			'rushing_epa':          {'weight': 8.0, 'direction': 1},
			'receiving_epa':        {'weight': 6.0, 'direction': 1},
			
			# Yards After Contact per Attempt (yac_att). 
			# This is the single best stat for isolating the RB's skill from the O-Line's blocking.
			'yac_att':              {'weight': 7.0, 'direction': 1},

			# --- Production & Volume (The "Workhorse" Metrics) ---
			# We value yards, but weight them slightly lower than EPA to avoid just rewarding pure volume.
			'rushing_yards':        {'weight': 6.0, 'direction': 1},
			'receiving_yards':      {'weight': 4.0, 'direction': 1},
			
			# Moving the chains is a key skill for a starter.
			'rushing_first_downs':  {'weight': 5.0, 'direction': 1},
			'rushing_tds':          {'weight': 3.0, 'direction': 1},

			# --- Playmaking & Skill ---
			# Broken Tackles (broken_tackles) shows elusiveness and power.
			'broken_tackles':              {'weight': 4.0, 'direction': 1},
			# Yards After Catch (receiving_yards_after_catch) rewards RBs who turn checkdowns into gains.
			'receiving_yards_after_catch': {'weight': 3.0, 'direction': 1},

			# --- Negatives ---
			# Fumbles are the quickest way to lose a job.
			'fumbles':     			{'weight': 5.0, 'direction': -1},
			# Drops in the passing game kill drive momentum.
			'drop_percent':         {'weight': 2.5, 'direction': -1},
			
			# --- Filter / Bonus ---
			# You might add a small weight to 'carries_per_game' if you specifically want to find high-volume starters.
			'carries'	:                 		{'weight': 0, 'direction': 1},
			'carries_per_game':              {'weight': 2.0, 'direction': 1}
		}    


In [781]:
class OLScorer(PositionScorer):
    def __init__(self): super().__init__("OL")
    
    def compute_metrics(self, row):
        snaps = max(1, row.get('offense_snaps', 1))
        return {
            'blown_block_rate': (row.get('sacks_allowed', 0) + row.get('hits_allowed', 0)) / snaps,
            'penalty_rate': row.get('penalties', 0) / snaps,
            'snap_count': row.get('offense_snaps', 0)
        }

    def get_config(self):
        return {
            # Massive Penalties for failure
            'blown_block_rate': {'weight': 15.0, 'direction': -1}, 
            'penalty_rate':     {'weight': 8.0,  'direction': -1},
            
            # Small positive weight just for being a starter (Availability)
            'snap_count':       {'weight': 2.0,  'direction': 1} 
        }



In [782]:
class DLScorer(PositionScorer):
    def __init__(self): super().__init__("DL")
    
    def compute_metrics(self, row):
        snaps = max(1, row.get('defense_snaps', 1))
        return {
            'pressure_rate': row.get('pressures', 0) / snaps,
            'sack_conversion': row.get('sacks', 0) / max(1, row.get('pressures', 1)), # How often do they finish?
            'missed_tackle_rate': row.get('missed_tackles', 0) / snaps,
        }

    def get_config(self):
        return {
            # Pressure is the best predictor of skill
            'pressure_rate':      {'weight': 12.0, 'direction': 1},
            
            # Finishing is high value but high variance
            'sack_conversion':    {'weight': 5.0,  'direction': 1},
            
            # Mistakes
            'missed_tackle_rate': {'weight': 4.0,  'direction': -1}
        }


In [783]:

# Usage
qb_scorer = QBScorer()
scored_df = qb_scorer.score_all(df_master)
scored_df.sort_values('SportsIQ', ascending=False).to_csv('nfl_qb_sportsiq_2025.csv', index=False)



Scoring position: QB with 81 players
Metrics DataFrame shape: (81, 11)
Filtered Metrics DataFrame shape: (56, 11)


In [784]:
rb_scorer = RBScorer()
scored_df = rb_scorer.score_all(df_master)
scored_df.sort_values('SportsIQ', ascending=False).to_csv('nfl_rb_sportsiq_2025.csv', index=False)


Scoring position: RB with 151 players
Metrics DataFrame shape: (151, 13)
Filtered Metrics DataFrame shape: (65, 13)


In [785]:
# def score_all_players(stats_df: pd.DataFrame, position_col: str = 'depth_chart_position') -> pd.DataFrame:
#     """
#     Score all players using position-specific scorers.
    
#     Args:
#         stats_df: DataFrame with player stats
#         position_col: Name of position column
    
#     Returns:
#         DataFrame with 'sportsiq_score' column added
#     """
#     stats_df = stats_df.copy()
#     stats_df['sportsiq_score'] = 0.0
    
#     for position in stats_df[position_col].unique():
#         if pd.isna(position):
#             continue
        
#         # Get scorer for this position (fallback to generic if not found)
#         scorer = POSITION_SCORERS.get(str(position).upper())
#         if scorer is None:
#             print(f"Warning: No scorer for position {position}, skipping")
#             continue
        
#         # Get all players at this position for peer normalization
#         position_mask = stats_df[position_col] == position
#         peer_stats = stats_df[position_mask]
        
#         # Score each player
#         scores = []
#         for idx, row in peer_stats.iterrows():
#             try:
#                 score = scorer.score_player(row, peer_stats)
#                 scores.append(score)
#             except Exception as e:
#                 print(f"Error scoring {row.get('player_name', 'unknown')}: {e}")
#                 scores.append(np.nan)
        
#         stats_df.loc[position_mask, 'sportsiq_score'] = scores
#         print(f"Scored {len(scores)} {position} players")
    
#     return stats_df

# # Score all players
# data_scored = score_all_players(df_master)
# data_scored[['player_name', 'depth_chart_position', 'sportsiq_score']].head(20)

# data_scored[['player_name', 'depth_chart_position', 'sportsiq_score']].to_csv('nfl_player_war_2025.csv', index=False)