In [2]:
%load_ext autoreload
%autoreload 2

import polars as pl

In [3]:
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
orpsp_predictions = pl.read_csv("../data/orpsp_predictions.csv", null_values=["NA", ""])

In [5]:
player_play = pl.read_csv("../data/player_play.csv", null_values=["NA", ""])

In [6]:
players = pl.read_csv("../data/players.csv", null_values=["NA", ""])

In [7]:
plays_qb = player_play.select(["gameId", "playId", "nflId"]).join(
    players.select(["nflId", "displayName", "position"]),
    on=["nflId"],
    how="inner",
).filter(pl.col("position")=="QB")
plays_qb = plays_qb.with_columns(plays_qb.select("gameId", "playId").is_duplicated().alias("duplicated"))
plays_qb = plays_qb.filter(~((pl.col("duplicated")) & (pl.col("displayName").is_in(["Taysom Hill", "Malik Willis"]))))
plays_qb = plays_qb.with_columns(plays_qb.select("gameId", "playId").is_duplicated().alias("duplicated"))

In [8]:
data = orpsp_predictions.join(
    player_play.select(["gameId", "playId", "nflId", "teamAbbr", "wasTargettedReceiver", "hadPassReception"]),
    on=["gameId", "playId", "nflId"],
    how="left",
)

In [9]:
data_targetted = data.filter(pl.col("wasTargettedReceiver")==1)

In [10]:
games = pl.read_csv("../data/games.csv", null_values=["NA", ""])

In [11]:
games = games.with_columns(
    (pl.col("homeFinalScore")>pl.col("visitorFinalScore")).alias("home_win"),
    (pl.col("homeFinalScore")<pl.col("visitorFinalScore")).alias("visitor_win"),
)

In [12]:
team_win = pl.concat(
    [
        games.select(pl.col("homeTeamAbbr").alias("teamAbbr"), pl.col("home_win").alias("win")),
        games.select(pl.col("visitorTeamAbbr").alias("teamAbbr"), pl.col("visitor_win").alias("win")),
    ]
).group_by(["teamAbbr"]).agg(pl.col("win").mean())

In [None]:
team_win.sort("win")

In [14]:
team_stats = data_targetted.group_by(["teamAbbr"]).agg(pl.col("orpsp").mean(), pl.col("hadPassReception").mean())

In [15]:
qb_targetted = data_targetted.join(
    plays_qb.select(["gameId", "playId", "displayName"]),
    on=["gameId", "playId"],
    how="inner",
)

qb_stats = qb_targetted.group_by(["displayName"]).agg(pl.col("orpsp").mean(), pl.col("hadPassReception").mean(), pl.col("wasTargettedReceiver").count())

qb_stats = qb_stats.filter(
    pl.col("wasTargettedReceiver")>=50,
)

In [16]:
player_stats = data_targetted.group_by(["nflId"]).agg(pl.col("orpsp").mean(), pl.col("hadPassReception").mean(), pl.col("wasTargettedReceiver").count())

player_stats = player_stats.join(
    players.select(["nflId", "displayName", "position"]),
    on=["nflId"],
    how="inner",
).filter(
    pl.col("wasTargettedReceiver")>=10,
)

In [None]:
team_stats.select(["orpsp", "hadPassReception"]).corr()

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
sns.regplot(x=team_stats["orpsp"].to_numpy(), y=team_stats["hadPassReception"].to_numpy())
for row in team_stats.rows(named=True):
    ax.text(row["orpsp"], row["hadPassReception"], row["teamAbbr"], fontsize=9, ha='right', va='bottom')

In [None]:
qb_stats.select(["orpsp", "hadPassReception"]).corr()

In [None]:
qb_stats.filter(pl.col("displayName").is_in(["Geno Smith", "Matt Ryan"]))

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
sns.regplot(x=qb_stats["orpsp"].to_numpy(), y=qb_stats["hadPassReception"].to_numpy(), scatter=False)
scatter = ax.scatter(x=qb_stats["orpsp"].to_numpy(), y=qb_stats["hadPassReception"].to_numpy(), c=qb_stats["wasTargettedReceiver"].to_numpy(), cmap="viridis")
cbar = fig.colorbar(scatter, ax=ax)
cbar.set_label("Throws")
for row in qb_stats.rows(named=True):
    if row["displayName"] in  ["Geno Smith", "Matt Ryan"]:
        ax.scatter(x=row["orpsp"], y=row["hadPassReception"], c="red", s=5)
        ax.text(row["orpsp"], row["hadPassReception"], row["displayName"], fontsize=10, ha='right', va='bottom', color='red', bbox=dict(facecolor='red', alpha=0.1))
    else:
        ax.text(row["orpsp"], row["hadPassReception"], row["displayName"], fontsize=7, ha='right', va='bottom')
ax.set_xlabel("Targeted Receiver Average ORPSP")
ax.set_ylabel("Completion Rate")
plt.savefig("../reports/figures/quarterback_performances", bbox_inches="tight")

In [None]:
player_stats.select(["orpsp", "hadPassReception"]).corr()

In [None]:
player_stats.filter(pl.col("displayName").is_in(["Amari Cooper", "Austin Ekeler", "JuJu Smith-Schuster"]))

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
sns.regplot(x=player_stats["orpsp"].to_numpy(), y=player_stats["hadPassReception"].to_numpy(), scatter=False)
scatter = ax.scatter(x=player_stats["orpsp"].to_numpy(), y=player_stats["hadPassReception"].to_numpy(), c=player_stats["wasTargettedReceiver"].to_numpy(), cmap="viridis")
cbar = fig.colorbar(scatter, ax=ax)
cbar.set_label("Targeted Count")
for player_name in ["Amari Cooper", "Austin Ekeler", "JuJu Smith-Schuster"]:
    row = player_stats.filter(pl.col("displayName")==player_name).row(0, named=True)
    ax.scatter(x=row["orpsp"], y=row["hadPassReception"], c="red", s=5)
    if player_name=="JuJu Smith-Schuster":
        ax.text(row["orpsp"], row["hadPassReception"], row["displayName"], fontsize=10, ha="left", va="bottom", color="red", bbox=dict(facecolor="red", alpha=0.1))
    else:
        ax.text(row["orpsp"], row["hadPassReception"], row["displayName"], fontsize=10, ha="right", va="bottom", color="red", bbox=dict(facecolor="red", alpha=0.1))
ax.set_xlabel("Targeted Average ORPSP")
ax.set_ylabel("Catch Rate")
plt.savefig("../reports/figures/receiver_performances", bbox_inches="tight")