In [184]:
from nflreadpy import load_pbp, load_players
import polars as pl

pbp = load_pbp(seasons=range(2017, 2024))

pbp = pbp.filter(
    pl.col("epa").is_not_null() &
    pl.col("play_type").is_in(["pass", "run"])
)

In [185]:
pass_facet = (
    pbp
    .filter(pl.col("passer_player_id").is_not_null())
    .group_by(["passer_player_id", "season"])
    .agg([
        pl.col("epa").sum().alias("epa_pass"),
        pl.count().alias("dropbacks")
    ])
    .rename({"passer_player_id": "gsis_id"})
)

(Deprecated in version 0.20.5)
  pl.count().alias("dropbacks")


In [186]:
rush_facet = (
    pbp
    .filter(pl.col("rusher_player_id").is_not_null())
    .group_by(["rusher_player_id", "season"])
    .agg([
        pl.col("epa").sum().alias("epa_rush"),
        pl.count().alias("rushes")
    ])
    .rename({"rusher_player_id": "gsis_id"})
)

(Deprecated in version 0.20.5)
  pl.count().alias("rushes")


In [187]:
rec_facet = (
    pbp
    .filter(pl.col("receiver_player_id").is_not_null())
    .group_by(["receiver_player_id", "season"])
    .agg([
        pl.col("epa").sum().alias("epa_rec"),
        pl.count().alias("targets")
    ])
    .rename({"receiver_player_id": "gsis_id"})
)

(Deprecated in version 0.20.5)
  pl.count().alias("targets")


In [188]:
players = load_players().select(["gsis_id", "display_name", "position", "position_group"])

off_facet = (
    pass_facet
    .join(rush_facet, on=["gsis_id", "season"], how="outer", coalesce=True)
    .join(rec_facet,  on=["gsis_id", "season"], how="outer", coalesce=True)
    .with_columns([
        pl.col("epa_pass").fill_null(0.0),
        pl.col("dropbacks").fill_null(0),
        pl.col("epa_rush").fill_null(0.0),
        pl.col("rushes").fill_null(0),
        pl.col("epa_rec").fill_null(0.0),
        pl.col("targets").fill_null(0),
    ])
    .join(players, on="gsis_id", how="left")
)

(Deprecated in version 0.20.29)
  .join(rush_facet, on=["gsis_id", "season"], how="outer", coalesce=True)
(Deprecated in version 0.20.29)
  .join(rec_facet,  on=["gsis_id", "season"], how="outer", coalesce=True)


In [189]:
# may be helpful later - long facet rows format
off_long = (
    off_facet.select([
        "gsis_id", "season", "display_name", "position", "position_group",
        pl.lit("pass").alias("facet"),
        pl.col("epa_pass").alias("epa"),
        pl.col("dropbacks").alias("opps"),
    ])
    .vstack(
        off_facet.select([
            "gsis_id", "season", "display_name", "position", "position_group",
            pl.lit("rush").alias("facet"),
            pl.col("epa_rush").alias("epa"),
            pl.col("rushes").alias("opps"),
        ])
    )
    .vstack(
        off_facet.select([
            "gsis_id", "season", "display_name", "position", "position_group",
            pl.lit("rec").alias("facet"),
            pl.col("epa_rec").alias("epa"),
            pl.col("targets").alias("opps"),
        ])
    )
    .filter(pl.col("opps") > 0)
)

In [190]:
off_long.head(15)

gsis_id,season,display_name,position,position_group,facet,epa,opps
str,i32,str,str,str,str,f64,u32
"""00-0036913""",2023,"""Kadarius Toney""","""WR""","""WR""","""pass""",-0.446093,1
"""00-0029000""",2020,"""Cole Beasley""","""WR""","""WR""","""pass""",2.913771,1
"""00-0031431""",2017,"""Marqise Lee""","""WR""","""WR""","""pass""",-0.373399,1
"""00-0031325""",2019,"""Sammy Watkins""","""WR""","""WR""","""pass""",-0.870342,1
"""00-0027150""",2019,"""Julian Edelman""","""WR""","""WR""","""pass""",5.536067,2
…,…,…,…,…,…,…,…
"""00-0031382""",2017,"""Jarvis Landry""","""WR""","""WR""","""pass""",-0.427772,1
"""00-0034960""",2021,"""Jakobi Meyers""","""WR""","""WR""","""pass""",1.786917,2
"""00-0034928""",2019,"""Steven Sims""","""WR""","""WR""","""pass""",-0.359818,1
"""00-0038977""",2023,"""Tank Dell""","""WR""","""WR""","""pass""",-0.777417,1


In [221]:
# WAR is defined relative to replacement-level performance
# Replacement is position and facet specific
# EPA is summed over plays
# Opportunities weight replacement value
# EPA -> wins conversion constant is fixed

epa_per_win = 45.0
min_opportunities = 20
replacement_base = 0.25

In [222]:
rep_pos = (
    off_long
    .filter(
        pl.col("position_group").is_not_null() &
        (pl.col("opps") >= min_opportunities)
    )
    .group_by(["season", "facet", "position_group"])
    .agg([
        pl.quantile("opps", replacement_base).alias("opps_cut")
    ])
    .join(
        off_long,
        on=["season", "facet", "position_group"],
        how="inner",
        coalesce=True
    )
    .filter(pl.col("opps") <= pl.col("opps_cut"))
    .group_by(["season", "facet", "position_group"])
    .agg([
        (pl.col("epa").sum() / pl.col("opps").sum()).alias("rep_epa_per_opp_pos"),
        pl.col("opps").sum().alias("rep_total_opps_pos"),
    ])
)

rep_facet = (
    off_long
    .filter(pl.col("opps") >= min_opportunities)
    .group_by(["season", "facet"])
    .agg([
        pl.quantile("opps", replacement_base).alias("opps_cut")
    ])
    .join(
        off_long,
        on=["season", "facet"],
        how="inner",
        coalesce=True
    )
    .filter(pl.col("opps") <= pl.col("opps_cut"))
    .group_by(["season", "facet"])
    .agg([
        (pl.col("epa").sum() / pl.col("opps").sum()).alias("rep_epa_per_opp_facet"),
        pl.col("opps").sum().alias("rep_total_opps_facet"),
    ])
)

rep_pooled = (
    off_long
    .filter(pl.col("opps") >= min_opportunities)
    .group_by(["facet"])
    .agg([
        pl.quantile("opps", replacement_base).alias("opps_cut")
    ])
    .join(
        off_long,
        on=["facet"],
        how="inner",
        coalesce=True
    )
    .filter(pl.col("opps") <= pl.col("opps_cut"))
    .group_by(["facet"])
    .agg([
        (pl.col("epa").sum() / pl.col("opps").sum()).alias("rep_epa_per_opp_pooled"),
        pl.col("opps").sum().alias("rep_total_opps_pooled"),
    ])
)

In [223]:
off_war_facet = (
    off_long
    .join(rep_pos.select(["season","facet","position_group","rep_epa_per_opp_pos"]),
          on=["season","facet","position_group"],
          how="left")
    .join(rep_facet.select(["season","facet","rep_epa_per_opp_facet"]),
          on=["season","facet"],
          how="left")
    .join(rep_pooled.select(["facet","rep_epa_per_opp_pooled"]),
          on=["facet"],
          how="left")
    .with_columns([
        pl.coalesce([
            pl.col("rep_epa_per_opp_pos"),
            pl.col("rep_epa_per_opp_facet"),
            pl.col("rep_epa_per_opp_pooled"),
        ]).alias("rep_epa_per_opp")
    ])
    .with_columns([
        (pl.col("epa") - pl.col("rep_epa_per_opp") * pl.col("opps")).alias("mepa"),
        ((pl.col("epa") - pl.col("rep_epa_per_opp") * pl.col("opps")) / epa_per_win).alias("war_facet"),
    ])
)

In [224]:
off_war_player = (
    off_war_facet
    .group_by(["gsis_id", "season", "display_name", "position", "position_group"])
    .agg([
        pl.col("war_facet").sum().alias("war_off"),

        pl.when(pl.col("facet") == "pass")
          .then(pl.col("opps"))
          .otherwise(0)
          .sum()
          .alias("dropbacks"),

        pl.when(pl.col("facet") == "rush")
          .then(pl.col("opps"))
          .otherwise(0)
          .sum()
          .alias("rushes"),

        pl.when(pl.col("facet") == "rec")
          .then(pl.col("opps"))
          .otherwise(0)
          .sum()
          .alias("targets"),
    ])
    .with_columns([
        (pl.col("dropbacks") + pl.col("rushes") + pl.col("targets")).alias("total_opps"),
    ])
)

In [225]:
off_war_player.filter(pl.col("position_group") == "QB").select(
    ["display_name", "season", "war_off", "dropbacks", "rushes", "targets", "total_opps"]
).sort("dropbacks", descending=True).head(10)

display_name,season,war_off,dropbacks,rushes,targets,total_opps
str,i32,f64,u32,u32,u32,u32
"""Tom Brady""",2021,5.289435,838,18,0,856
"""Tom Brady""",2022,5.507069,828,12,1,841
"""Patrick Mahomes""",2021,6.248869,818,65,0,883
"""Justin Herbert""",2022,5.416452,782,39,2,823
"""Matthew Stafford""",2021,5.508244,780,30,1,811
"""Patrick Mahomes""",2022,10.183907,780,62,1,843
"""Patrick Mahomes""",2023,5.313923,776,70,0,846
"""Tom Brady""",2020,6.995725,773,16,0,789
"""Joe Burrow""",2022,7.395526,770,65,0,835
"""Tom Brady""",2017,9.488783,759,18,1,778


In [226]:
top_war = off_war_player.sort("war_off", descending=True).head(20)

top_war

gsis_id,season,display_name,position,position_group,war_off,dropbacks,rushes,targets,total_opps
str,i32,str,str,str,f64,u32,u32,u32,u32
"""00-0033873""",2022,"""Patrick Mahomes""","""QB""","""QB""",10.183907,780,62,1,843
"""00-0019596""",2017,"""Tom Brady""","""QB""","""QB""",9.488783,759,18,1,778
"""00-0033873""",2018,"""Patrick Mahomes""","""QB""","""QB""",9.269045,686,54,0,740
"""00-0033873""",2019,"""Patrick Mahomes""","""QB""","""QB""",7.88479,620,47,0,667
"""00-0033873""",2020,"""Patrick Mahomes""","""QB""","""QB""",7.846389,732,56,2,790
…,…,…,…,…,…,…,…,…,…
"""00-0019596""",2018,"""Tom Brady""","""QB""","""QB""",6.945196,716,13,1,730
"""00-0022942""",2018,"""Philip Rivers""","""QB""","""QB""",6.733334,636,13,0,649
"""00-0022942""",2017,"""Philip Rivers""","""QB""","""QB""",6.73049,592,7,0,599
"""00-0036389""",2022,"""Jalen Hurts""","""QB""","""QB""",6.558512,590,182,0,772


In [227]:
off_war_player.select("war_off").describe()

statistic,war_off
str,f64
"""count""",4282.0
"""null_count""",0.0
"""mean""",0.287939
"""std""",0.95366
"""min""",-1.446341
"""25%""",-0.051721
"""50%""",0.030011
"""75%""",0.253653
"""max""",10.183907


In [237]:
off_war_facet.filter(pl.col("display_name") == "Alvin Kamara").sort(["season","facet"])

gsis_id,season,display_name,position,position_group,facet,epa,opps,rep_epa_per_opp_pos,rep_epa_per_opp_facet,rep_epa_per_opp_pooled,rep_epa_per_opp,mepa,war_facet
str,i32,str,str,str,str,f64,u32,f64,f64,f64,f64,f64,f64
"""00-0033906""",2017,"""Alvin Kamara""","""RB""","""RB""","""rec""",39.521551,110,-0.00131,0.030806,0.055766,-0.00131,39.665613,0.881458
"""00-0033906""",2017,"""Alvin Kamara""","""RB""","""RB""","""rush""",27.482429,142,-0.143608,-0.045064,-0.002694,-0.143608,47.874812,1.063885
"""00-0033906""",2018,"""Alvin Kamara""","""RB""","""RB""","""rec""",24.47837,122,-0.079585,0.069438,0.055766,-0.079585,34.187693,0.759727
"""00-0033906""",2018,"""Alvin Kamara""","""RB""","""RB""","""rush""",10.131596,221,-0.159117,0.038358,-0.002694,-0.159117,45.296466,1.006588
"""00-0033906""",2019,"""Alvin Kamara""","""RB""","""RB""","""pass""",0.964815,1,,-0.221938,-0.186873,-0.221938,1.186753,0.026372
…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""00-0033906""",2021,"""Alvin Kamara""","""RB""","""RB""","""rush""",-40.469831,241,-0.107646,0.033683,-0.002694,-0.107646,-14.527084,-0.322824
"""00-0033906""",2022,"""Alvin Kamara""","""RB""","""RB""","""rec""",-3.401088,77,-0.045301,0.034777,0.055766,-0.045301,0.087123,0.001936
"""00-0033906""",2022,"""Alvin Kamara""","""RB""","""RB""","""rush""",-41.050238,223,-0.156783,-0.05391,-0.002694,-0.156783,-6.08768,-0.135282
"""00-0033906""",2023,"""Alvin Kamara""","""RB""","""RB""","""rec""",3.35124,87,-0.092547,0.005315,0.055766,-0.092547,11.402786,0.253395


In [238]:
off_war_player.filter(pl.col("display_name") == "Alvin Kamara").sort("season")

gsis_id,season,display_name,position,position_group,war_off,dropbacks,rushes,targets,total_opps
str,i32,str,str,str,f64,u32,u32,u32,u32
"""00-0033906""",2017,"""Alvin Kamara""","""RB""","""RB""",1.945343,0,142,110,252
"""00-0033906""",2018,"""Alvin Kamara""","""RB""","""RB""",1.766315,0,221,122,343
"""00-0033906""",2019,"""Alvin Kamara""","""RB""","""RB""",0.064615,1,178,106,285
"""00-0033906""",2020,"""Alvin Kamara""","""RB""","""RB""",0.762836,0,228,115,343
"""00-0033906""",2021,"""Alvin Kamara""","""RB""","""RB""",-0.092198,0,241,67,308
"""00-0033906""",2022,"""Alvin Kamara""","""RB""","""RB""",-0.133346,0,223,77,300
"""00-0033906""",2023,"""Alvin Kamara""","""RB""","""RB""",0.882352,0,182,87,269
