## NFL PLayer WAR

Create a SportsIQ Score (WAR type). This score should range from 0 - 100 scale

This will take into account a players:
- EPA / Play
- Success Rate
- CPOE (QB)
- YPRR (WR / TE)
- Pass block / run block grades (OL)
- Pressure rate (DL / EDGE)
- win rate (DL / EDGE)
- Coverage success / penalties (DB)
- Special teams snaps
- Field Goal Made / Missed (K)

In [231]:
import nflreadpy as nfl
import pandas as pd
import numpy as np
from abc import ABC, abstractmethod
from typing import Dict, Any

In [232]:
# 1. THE SKELETON: Load Rosters (The "Rosetta Stone")
# We use this as the base because it connects 'gsis_id' (NFL) to 'pfr_id' (PFR)
roster = nfl.load_rosters(seasons=[2025]).to_pandas()
# Filter to active players and keep only essential ID mapping columns

roster_key = roster[['full_name', 'position', 'team', 'gsis_id', 'pfr_id', 'depth_chart_position']]

# Remove everyone without a gsis_id
roster_key = roster_key[roster_key['gsis_id'].notna()]
# Remove everyone without a pfr_id
roster_key = roster_key[roster_key['pfr_id'].notna()]

In [233]:
roster_key.head()

Unnamed: 0,full_name,position,team,gsis_id,pfr_id,depth_chart_position
1,Philip Rivers,QB,IND,00-0022942,RivePh00,QB
2,Aaron Rodgers,QB,PIT,00-0023459,RodgAa00,QB
3,Matt Prater,K,BUF,00-0023853,PratMa20,K
4,Marcedes Lewis,TE,DEN,00-0024243,LewiMa00,TE
5,Nick Folk,K,NYJ,00-0025565,FolkNi20,K


In [234]:
# 2. THE MOTOR: Snap Counts (Crucial for Rate Metrics)
# Uses PFR IDs. We filter for 2025.
snaps = nfl.load_snap_counts(seasons=[2025]).to_pandas()
# Group by player ID to get season totals (since load_snap_counts is game-level)
snaps_season = snaps.groupby('pfr_player_id')[['offense_snaps', 'defense_snaps', 'st_snaps']].sum().reset_index()



In [235]:
# 3. THE PRODUCTION: Standard Box Score Stats & Advanced Stats
# Uses GSIS IDs (player_id).
player_stats = nfl.load_player_stats(seasons=[2025], summary_level='reg').to_pandas()
adv_def = nfl.load_pfr_advstats(seasons=[2025], summary_level='season', stat_type='def').to_pandas()
adv_pass = nfl.load_pfr_advstats(seasons=[2025], summary_level='season', stat_type='pass').to_pandas()
adv_rush = nfl.load_pfr_advstats(seasons=[2025], summary_level='season', stat_type='rush').to_pandas()
adv_rec = nfl.load_pfr_advstats(seasons=[2025], summary_level='season', stat_type='rec').to_pandas()


In [236]:
# 4. THE MASTER MERGE (Sequential)

# Start with Roster
df_master = roster_key.copy()

# A. Merge Snaps (PFR ID)
df_master = df_master.merge(snaps_season, left_on='pfr_id', right_on='pfr_player_id', how='left')

# B. Merge Standard Stats (GSIS ID)
df_master = df_master.merge(player_stats, left_on='gsis_id', right_on='player_id', how='left')

# C. Defense & OL (No overlap usually)
df_master = df_master.merge(adv_def, on='pfr_id', how='left', suffixes=('', '_def'))

# D. Passing (QBs)
df_master = df_master.merge(adv_pass, on='pfr_id', how='left', suffixes=('', '_pass'))

# E. Rushing (RBs/QBs)
# Note: 'broken_tackles' exists in both Rush and Rec. We force a suffix.
df_master = df_master.merge(adv_rush, on='pfr_id', how='left', suffixes=('', '_rush'))

# F. Receiving (WRs/TEs/RBs)
df_master = df_master.merge(adv_rec, on='pfr_id', how='left', suffixes=('', '_rec'))

In [237]:
# 4.b Deduplicate by unique player (gsis_id), keeping first occurrence
df_master = df_master.drop_duplicates(subset=['gsis_id'], keep='first')

# Remove players with 0 snaps or null values
df_master = df_master[(df_master['offense_snaps'] > 0) | (df_master['defense_snaps'] > 0) | (df_master['st_snaps'] > 0)]
df_master = df_master.dropna(subset=['offense_snaps', 'defense_snaps', 'st_snaps'])

In [238]:
df_master.shape

df_master.to_csv('nfl_player_master_2025.csv', index=False)

In [239]:
# Check for duplicates
print("Checking for duplicates by full_name:")
duplicate_names = df_master['full_name'].value_counts()
duplicates = duplicate_names[duplicate_names > 1]
print(f"\nFound {len(duplicates)} players with multiple rows:")
print(duplicates)

# Show Joe Flacco example
print("\n\nJoe Flacco rows:")
print(df_master[df_master['full_name'] == 'Joe Flacco'][['full_name', 'gsis_id', 'pfr_id', 'team', 'depth_chart_position']])

print("\n\nColumn names:")
print(df_master.columns.tolist())

Checking for duplicates by full_name:

Found 3 players with multiple rows:
full_name
Jordan Phillips    2
Jaylon Jones       2
Byron Young        2
Name: count, dtype: int64


Joe Flacco rows:
    full_name     gsis_id    pfr_id team depth_chart_position
5  Joe Flacco  00-0026158  FlacJo00  CIN                   QB


Column names:
['full_name', 'position_x', 'team', 'gsis_id', 'pfr_id', 'depth_chart_position', 'pfr_player_id', 'offense_snaps', 'defense_snaps', 'st_snaps', 'player_id', 'player_name', 'player_display_name', 'position_y', 'position_group', 'headshot_url', 'season', 'season_type', 'recent_team', 'games', 'completions', 'attempts', 'passing_yards', 'passing_tds', 'passing_interceptions', 'sacks_suffered', 'sack_yards_lost', 'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards', 'passing_yards_after_catch', 'passing_first_downs', 'passing_epa', 'passing_cpoe', 'passing_2pt_conversions', 'pacr', 'carries', 'rushing_yards', 'rushing_tds', 'rushing_fumbles', 'rushing_fumbles

In [240]:
# Examine remaining duplicates to understand if they're legitimate
print("Remaining duplicates - checking if they're different players:\n")
for dup_name in ['Jordan Phillips', 'Jaylon Jones', 'Byron Young']:
    print(f"\n{dup_name}:")
    dup_rows = df_master[df_master['full_name'] == dup_name][['full_name', 'gsis_id', 'pfr_id', 'team', 'position_x', 'depth_chart_position']]
    print(dup_rows)

Remaining duplicates - checking if they're different players:


Jordan Phillips:
            full_name     gsis_id    pfr_id team position_x  \
88    Jordan Phillips  00-0031557  PhilJo01  BUF         DL   
2076  Jordan Phillips  00-0040175  PhilJo02  MIA         DL   

     depth_chart_position  
88                     DT  
2076                   DT  

Jaylon Jones:
         full_name     gsis_id    pfr_id team position_x depth_chart_position
1053  Jaylon Jones  00-0037106  JoneJa12  CHI         DB                   CB
1379  Jaylon Jones  00-0038407  JoneJa13  IND         DB                   CB

Byron Young:
        full_name     gsis_id    pfr_id team position_x depth_chart_position
1586  Byron Young  00-0038978  YounBy00  PHI         DL                   DT
1642  Byron Young  00-0039137  YounBy01   LA         LB                  OLB


In [None]:
class PositionScorer(ABC):
    """
    Scores players based on weighted Z-scores, normalized to a 0-100 scale.
    Weights do NOT need to sum to 1.
    """
    
    def __init__(self, position: str):
        self.position = position
        
    @abstractmethod
    def compute_metrics(self, row: pd.Series) -> Dict[str, float]:
        """Extract raw stats from a row."""
        pass
    
    @abstractmethod
    def get_config(self) -> Dict[str, Dict[str, float]]:
        """
        Configuration for metrics.
        - 'weight': Relative importance (e.g., 10.0 for huge impact, 1.0 for minor).
        - 'direction': 1 (Higher is Better) or -1 (Lower is Better).
        """
        pass

    def _calculate_raw_score(self, row: pd.Series, peer_stats: pd.DataFrame, config: Dict) -> float:
        """Internal helper to calculate the raw weighted sum."""
        raw_score = 0.0
        
        for metric, conf in config.items():
            val = self.compute_metrics(row).get(metric, 0)
            weight = conf['weight']
            direction = conf['direction']
            
            # Get peer distribution for this specific metric
            # (In production, cache these means/stds to speed up)
            peer_vals = peer_stats.apply(lambda r: self.compute_metrics(r).get(metric, 0), axis=1)
            
            if len(peer_vals) > 1 and np.std(peer_vals) > 0:
                mean = np.mean(peer_vals)
                std = np.std(peer_vals)
                z_score = (val - mean) / std
            else:
                z_score = 0.0
                
            # Add to total: Z-Score * Weight * Direction
            # Example: Bad Throw (Z=2.0) * Weight(5) * Dir(-1) = -10.0 impact
            raw_score += (z_score * weight * direction)
            
        return raw_score

    def score_all(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Scores all players and returns detailed breakdown of what adds up to raw_score.
        """
        # Filter for this position
        peers = df[df['depth_chart_position'] == self.position].copy()
        
        if peers.empty:
            return peers
        
        config = self.get_config()
        metric_names = list(config.keys())
        
        # 1. Compute raw metrics for all peers at once
        metrics_df = peers.apply(lambda r: pd.Series(self.compute_metrics(r)), axis=1)
        metrics_df = metrics_df.reindex(columns=metric_names)
        
        # 2. Calculate peer distribution (mean/std) per metric
        means = metrics_df.mean(skipna=True)
        stds = metrics_df.std(ddof=0, skipna=True)  # Population std
        
        # 3. Compute z-scores (fill NaN with 0 when std=0)
        z_scores_df = (metrics_df - means) / stds.replace(0, np.nan)
        z_scores_df = z_scores_df.fillna(0.0)
        
        # 4. Compute weighted directional contributions per metric
        weights = pd.Series({m: config[m]['weight'] for m in metric_names})
        directions = pd.Series({m: config[m]['direction'] for m in metric_names})
        contrib_df = z_scores_df.multiply(weights, axis=1).multiply(directions, axis=1)
        
        # 5. Raw score is sum of contributions
        peers['raw_score'] = contrib_df.sum(axis=1)
        
        # 6. Convert to T-Score (0-100 scale)
        raw_mean = peers['raw_score'].mean()
        raw_std = peers['raw_score'].std()
        
        if raw_std == 0:
            peers['SportsIQ'] = 75.0
        else:
            peers['SportsIQ'] = ((peers['raw_score'] - raw_mean) / raw_std) * 10 + 75
        
        peers['SportsIQ'] = peers['SportsIQ'].clip(0, 99)
        
        # 7. Attach all breakdown columns
        peers = peers.join(metrics_df.add_prefix('metric_'))
        peers = peers.join(z_scores_df.add_prefix('z_'))
        peers = peers.join(contrib_df.add_prefix('contrib_'))
        
        # Return in logical order: identifiers → scores → breakdown
        result_cols = (
            ['player_name', 'depth_chart_position', 'SportsIQ', 'raw_score']
            + [f'metric_{m}' for m in metric_names]
            + [f'z_{m}' for m in metric_names]
            + [f'contrib_{m}' for m in metric_names]
        )
        return peers[[c for c in result_cols if c in peers.columns]]

# ---------------------------------------------------------
# Position Configs (No Sum Constraint)
# ---------------------------------------------------------

class QBScorer(PositionScorer):
    def __init__(self): super().__init__("QB")
    
    def compute_metrics(self, row):
        atts = max(1, row.get('passing_attempts', 1))
        snaps = max(1, row.get('offense_snaps', 1))
        return {
            'epa_per_play': row.get('passing_epa', 0), # EPA is already per-play usually
            'cpoe': row.get('passing_cpoe', 0),
            'bad_throw_pct': row.get('bad_throws', 0) / atts,
            'sack_pct': row.get('sacks', 0) / snaps,
            'interception_pct': row.get('interceptions', 0) / atts,
            'big_time_throws': row.get('passing_air_yards', 0) / atts, # Proxy for deep aggression
            'accuracy': row.get('on_tgt_throws', 0) / atts
        }

    def get_config(self):
        return {
            # Huge Weights for Efficiency
            'epa_per_play':    {'weight': 10.0, 'direction': 1}, 
            'cpoe':            {'weight': 8.0,  'direction': 1},
            
            # Medium Weights
            'bad_throw_pct':   {'weight': 5.0,  'direction': -1}, 
            'sack_pct':        {'weight': 4.0,  'direction': -1},
            
            # Small Bonus
            'big_time_throws': {'weight': 2.0,  'direction': 1},
            'interception_pct': {'weight': 3.0, 'direction': -1},
            'accuracy': {'weight': 3.0, 'direction': 1}
        }
    
class RBScorer(PositionScorer):
    def __init__(self): super().__init__("RB")
    
    def compute_metrics(self, row):
        atts = max(1, row.get('carries', 1))
        return {
            'epa_per_play': row.get('rushing_epa', 0), # EPA is already per-play usually
            'fumbles': row.get('rushing_fumbles_lost', 0),
            'broken_tackles': row.get('brk_tkl', 0),
            'yac_att': row.get('yac_att', 0),
            'yards_per_carry': row.get('rushing_yards', 0) / atts,
            'drops': row.get('drops', 0),
            'attempts': row.get('carriers', 0)
        }

    def get_config(self):
        return {
            # Huge Weights for Efficiency
            'epa_per_play':    {'weight': 10.0, 'direction': 1},
            'yards_per_carry': {'weight': 6.0,  'direction': 1},

            # Medium Weights
            'fumbles':   {'weight': 5.0,  'direction': -1}, 
            'broken_tackles': {'weight': 4.0, 'direction': 1},
            'attempts': {'weight': 4.0, 'direction': 1},
            
            # Small Bonus
            'yac_att': {'weight': 3.0, 'direction': 1},
            'drops': {'weight': 2.0, 'direction': -1}
        }    

class OLScorer(PositionScorer):
    def __init__(self): super().__init__("OL")
    
    def compute_metrics(self, row):
        snaps = max(1, row.get('offense_snaps', 1))
        return {
            'blown_block_rate': (row.get('sacks_allowed', 0) + row.get('hits_allowed', 0)) / snaps,
            'penalty_rate': row.get('penalties', 0) / snaps,
            'snap_count': row.get('offense_snaps', 0)
        }

    def get_config(self):
        return {
            # Massive Penalties for failure
            'blown_block_rate': {'weight': 15.0, 'direction': -1}, 
            'penalty_rate':     {'weight': 8.0,  'direction': -1},
            
            # Small positive weight just for being a starter (Availability)
            'snap_count':       {'weight': 2.0,  'direction': 1} 
        }

class DLScorer(PositionScorer):
    def __init__(self): super().__init__("DL")
    
    def compute_metrics(self, row):
        snaps = max(1, row.get('defense_snaps', 1))
        return {
            'pressure_rate': row.get('pressures', 0) / snaps,
            'sack_conversion': row.get('sacks', 0) / max(1, row.get('pressures', 1)), # How often do they finish?
            'missed_tackle_rate': row.get('missed_tackles', 0) / snaps,
        }

    def get_config(self):
        return {
            # Pressure is the best predictor of skill
            'pressure_rate':      {'weight': 12.0, 'direction': 1},
            
            # Finishing is high value but high variance
            'sack_conversion':    {'weight': 5.0,  'direction': 1},
            
            # Mistakes
            'missed_tackle_rate': {'weight': 4.0,  'direction': -1}
        }

# Usage
qb_scorer = QBScorer()
scored_df = qb_scorer.score_all(df_master)
print(scored_df.sort_values('SportsIQ', ascending=False).head())




TypeError: unhashable type: 'Index'

In [None]:
rb_scorer = RBScorer()
scored_df = rb_scorer.score_all(df_master)
print(scored_df.sort_values('SportsIQ', ascending=False).head())

     player_name depth_chart_position   SportsIQ  raw_score
1610    D.Achane                   RB  99.000000  41.871704
1607   R.Johnson                   RB  99.000000  36.282667
703     J.Taylor                   RB  99.000000  36.767648
1163     B.Brown                   RB  96.289221  31.242586
1866     B.Corum                   RB  95.779562  30.511994


In [None]:
def score_all_players(stats_df: pd.DataFrame, position_col: str = 'depth_chart_position') -> pd.DataFrame:
    """
    Score all players using position-specific scorers.
    
    Args:
        stats_df: DataFrame with player stats
        position_col: Name of position column
    
    Returns:
        DataFrame with 'sportsiq_score' column added
    """
    stats_df = stats_df.copy()
    stats_df['sportsiq_score'] = 0.0
    
    for position in stats_df[position_col].unique():
        if pd.isna(position):
            continue
        
        # Get scorer for this position (fallback to generic if not found)
        scorer = POSITION_SCORERS.get(str(position).upper())
        if scorer is None:
            print(f"Warning: No scorer for position {position}, skipping")
            continue
        
        # Get all players at this position for peer normalization
        position_mask = stats_df[position_col] == position
        peer_stats = stats_df[position_mask]
        
        # Score each player
        scores = []
        for idx, row in peer_stats.iterrows():
            try:
                score = scorer.score_player(row, peer_stats)
                scores.append(score)
            except Exception as e:
                print(f"Error scoring {row.get('player_name', 'unknown')}: {e}")
                scores.append(np.nan)
        
        stats_df.loc[position_mask, 'sportsiq_score'] = scores
        print(f"Scored {len(scores)} {position} players")
    
    return stats_df

# Score all players
data_scored = score_all_players(df_master)
data_scored[['player_name', 'depth_chart_position', 'sportsiq_score']].head(20)

data_scored[['player_name', 'depth_chart_position', 'sportsiq_score']].to_csv('nfl_player_war_2025.csv', index=False)

Scored 81 QB players
Scored 142 TE players
Scored 248 WR players
Scored 236 CB players
Scored 1 LB players
