In [99]:
from nflreadpy import load_pbp, load_players
import polars as pl

pbp = load_pbp(seasons=range(2018, 2024))

pbp = pbp.filter(
    pl.col("epa").is_not_null() &
    pl.col("play_type").is_in(["pass", "run"])
)

In [100]:
pass_facet = (
    pbp
    .filter(pl.col("passer_player_id").is_not_null())
    .group_by(["passer_player_id", "season"])
    .agg([
        pl.col("epa").sum().alias("epa_pass"),
        pl.count().alias("dropbacks")
    ])
    .rename({"passer_player_id": "gsis_id"})
)

(Deprecated in version 0.20.5)
  pl.count().alias("dropbacks")


In [101]:
rush_facet = (
    pbp
    .filter(pl.col("rusher_player_id").is_not_null())
    .group_by(["rusher_player_id", "season"])
    .agg([
        pl.col("epa").sum().alias("epa_rush"),
        pl.count().alias("rushes")
    ])
    .rename({"rusher_player_id": "gsis_id"})
)

(Deprecated in version 0.20.5)
  pl.count().alias("rushes")


In [102]:
rec_facet = (
    pbp
    .filter(pl.col("receiver_player_id").is_not_null())
    .group_by(["receiver_player_id", "season"])
    .agg([
        pl.col("epa").sum().alias("epa_rec"),
        pl.count().alias("targets")
    ])
    .rename({"receiver_player_id": "gsis_id"})
)

(Deprecated in version 0.20.5)
  pl.count().alias("targets")


In [104]:
off_facet = (
    pass_facet
    .join(rush_facet, on=["gsis_id", "season"], how="outer", coalesce=True)
    .join(rec_facet,  on=["gsis_id", "season"], how="outer", coalesce=True)
    .with_columns([
        pl.col("epa_pass").fill_null(0.0),
        pl.col("dropbacks").fill_null(0),
        pl.col("epa_rush").fill_null(0.0),
        pl.col("rushes").fill_null(0),
        pl.col("epa_rec").fill_null(0.0),
        pl.col("targets").fill_null(0),
    ])
)

(Deprecated in version 0.20.29)
  .join(rush_facet, on=["gsis_id", "season"], how="outer", coalesce=True)
(Deprecated in version 0.20.29)
  .join(rec_facet,  on=["gsis_id", "season"], how="outer", coalesce=True)


In [105]:
players = load_players().select(["gsis_id", "display_name", "position", "position_group"])

off_facet = off_facet.join(players, on="gsis_id", how="left")

In [107]:
off_facet.filter(pl.col("display_name").str.contains("Taysom", literal=False))

gsis_id,season,epa_pass,dropbacks,epa_rush,rushes,epa_rec,targets,display_name,position,position_group
str,i32,f64,u32,f64,u32,f64,u32,str,str,str
"""00-0033357""",2023,1.458407,13,7.237412,81,10.079289,40,"""Taysom Hill""","""TE""","""TE"""
"""00-0033357""",2018,-1.202234,9,10.223195,41,-4.253988,9,"""Taysom Hill""","""TE""","""TE"""
"""00-0033357""",2022,12.586182,21,18.037264,96,2.595402,13,"""Taysom Hill""","""TE""","""TE"""
"""00-0033357""",2019,-0.334989,8,9.938473,32,23.359712,24,"""Taysom Hill""","""TE""","""TE"""
"""00-0033357""",2021,-11.237716,141,17.6377,68,-2.829543,6,"""Taysom Hill""","""TE""","""TE"""
"""00-0033357""",2020,-2.806256,136,13.45165,91,-0.575805,15,"""Taysom Hill""","""TE""","""TE"""


In [109]:
# may be helpful later - long facet rows format
off_long = (
    off_facet.select([
        "gsis_id", "season", "display_name", "position", "position_group",
        pl.lit("pass").alias("facet"), pl.col("epa_pass").alias("epa"), pl.col("dropbacks").alias("opps")
    ])
    .vstack(off_facet.select([
        "gsis_id", "season", "display_name", "position", "position_group",
        pl.lit("rush").alias("facet"), pl.col("epa_rush").alias("epa"), pl.col("rushes").alias("opps")
    ]))
    .vstack(off_facet.select([
        "gsis_id", "season", "display_name", "position", "position_group",
        pl.lit("rec").alias("facet"), pl.col("epa_rec").alias("epa"), pl.col("targets").alias("opps")
    ]))
    .filter(pl.col("opps") > 0)
)