In [1]:
import nflreadpy as nfl
import polars as pl
import pandas as pd
import numpy as np

In [2]:
seasons = list(range(2012, 2025))  # snap counts start in 2012
snaps_pl = nfl.load_snap_counts(seasons)      # returns Polars
snaps = snaps_pl.to_pandas()

snaps.head(), snaps.columns

(           game_id   pfr_game_id  season game_type  week          player  \
 0  2013_01_ARI_STL  201309080ram    2013       REG     1  Chris Williams   
 1  2013_01_ARI_STL  201309080ram    2013       REG     1     Harvey Dahl   
 2  2013_01_ARI_STL  201309080ram    2013       REG     1       Jake Long   
 3  2013_01_ARI_STL  201309080ram    2013       REG     1     Scott Wells   
 4  2013_01_ARI_STL  201309080ram    2013       REG     1    Sam Bradford   
 
   pfr_player_id position team opponent  offense_snaps  offense_pct  \
 0      WillCh03        G  STL      ARI           67.0          1.0   
 1      DahlHa20        G  STL      ARI           67.0          1.0   
 2      LongJa20        T  STL      ARI           67.0          1.0   
 3      WellSc20        C  STL      ARI           67.0          1.0   
 4      BradSa00       QB  STL      ARI           67.0          1.0   
 
    defense_snaps  defense_pct  st_snaps  st_pct  
 0            0.0          0.0       5.0    0.17  
 1    

In [4]:
# Identify which snap columns exist in your file
snap_cols = [c for c in snaps.columns if c.endswith("_snaps")]
snap_cols

['offense_snaps', 'defense_snaps', 'st_snaps']

In [5]:
# total snaps across whatever snap columns exist (offense/defense/st)
snaps["time_in_role"] = snaps[snap_cols].fillna(0).sum(axis=1)

nfl_roles = (
    snaps
    .groupby(["pfr_player_id", "season", "position"], as_index=False)
    .agg(time_in_role=("time_in_role", "sum"))
    .rename(columns={"pfr_player_id": "player_id"})
)

nfl_roles["league"] = "NFL"
nfl_roles.head()

Unnamed: 0,player_id,season,position,time_in_role,league
0,AaitIs00,2013,NT,45.0,NFL
1,AbanIs00,2023,RB,82.0,NFL
2,AbbrJa00,2015,WR,220.0,NFL
3,AbbrJa00,2016,WR,62.0,NFL
4,AbbrJa00,2017,WR,208.0,NFL


In [6]:
nfl_roles["total_time"] = nfl_roles.groupby(["player_id", "season"])["time_in_role"].transform("sum")
nfl_roles["role_share"] = nfl_roles["time_in_role"] / nfl_roles["total_time"]

nfl_roles.groupby(["player_id", "season"])["role_share"].sum().describe()

count    2.496300e+04
mean     1.000000e+00
std      1.217114e-18
min      1.000000e+00
25%      1.000000e+00
50%      1.000000e+00
75%      1.000000e+00
max      1.000000e+00
Name: role_share, dtype: float64

In [7]:
def entropy(shares):
    shares = shares[shares > 0]
    return -np.sum(shares * np.log(shares))

utility_entropy = (
    nfl_roles
    .groupby(["player_id", "season"])
    .agg(
        utility_entropy=("role_share", entropy),
        n_roles=("position", "nunique")
    )
    .reset_index()
)

utility_entropy["max_entropy"] = np.log(utility_entropy["n_roles"])
utility_entropy["utility_score"] = utility_entropy["utility_entropy"] / utility_entropy["max_entropy"]
utility_entropy.loc[utility_entropy["n_roles"] == 1, "utility_score"] = 0.0
utility_entropy["utility_score"] = utility_entropy["utility_score"].clip(lower=0)

utility_entropy.head()

Unnamed: 0,player_id,season,utility_entropy,n_roles,max_entropy,utility_score
0,AaitIs00,2013,-0.0,1,0.0,0.0
1,AbanIs00,2023,-0.0,1,0.0,0.0
2,AbbrJa00,2015,-0.0,1,0.0,0.0
3,AbbrJa00,2016,-0.0,1,0.0,0.0
4,AbbrJa00,2017,-0.0,1,0.0,0.0


In [8]:
nfl_roles.to_csv("../data/processed/nfl_roles_with_utility.csv", index=False)
utility_entropy.to_csv("../data/processed/nfl_season_utility_scores.csv", index=False)

In [9]:
print("Roles rows:", len(nfl_roles))
print("Seasons rows:", len(utility_entropy))

# Who has the highest utility seasons?
utility_entropy.sort_values("utility_score", ascending=False).head(10)

Roles rows: 25937
Seasons rows: 24963


Unnamed: 0,player_id,season,utility_entropy,n_roles,max_entropy,utility_score
3001,BucaDe00,2020,0.693147,2,0.693147,1.0
14389,MaydJa01,2020,0.693147,2,0.693147,1.0
16422,NursJo00,2020,0.693147,2,0.693147,1.0
12195,JudoMa00,2020,0.693146,2,0.693147,0.999998
11960,JoneJo06,2022,0.693063,2,0.693147,0.999879
9385,HaywCa00,2020,0.693012,2,0.693147,0.999805
8770,HallTy01,2020,0.692885,2,0.693147,0.999622
18184,ReddCo20,2015,0.692797,2,0.693147,0.999495
15097,MeekQu00,2020,0.692494,2,0.693147,0.999057
17847,PricBi01,2020,0.692481,2,0.693147,0.999039
