In [2]:
from pathlib import Path
import pandas as pd


In [3]:
projectRoot = Path.cwd()
trackingDf = pd.read_csv(f'{projectRoot}/data/train/input_2023_w01.csv')

In [4]:
# unique players to predict per play
cand = (
    trackingDf.loc[trackingDf["player_to_predict"]]
    .dropna(subset=["nfl_id"])  # optional, if nfl_id can be NaN
    .drop_duplicates(subset=["game_id", "play_id", "nfl_id"])
)

# count unique nfl_id per (game_id, play_id)
per_play_unique = cand.groupby(["game_id", "play_id"])["nfl_id"].nunique()

# 1) Check if EVERY play has exactly 3
all_exactly_three = (per_play_unique == 3).all()

# 2) Plays that are NOT exactly 3 (to inspect)
not_three = per_play_unique[per_play_unique == 1].reset_index(name="unique_nfl_ids")

# 3) Quick summary
summary = per_play_unique.value_counts().sort_index()
print("All plays have exactly 3?", all_exactly_three)
print("Counts distribution (unique nfl_id per play):")
print(summary)
print("Examples not equal to 3:")
print(not_three.head())


All plays have exactly 3? False
Counts distribution (unique nfl_id per play):
nfl_id
1     66
2    167
3    267
4    177
5     92
6     41
7      8
8      1
Name: count, dtype: int64
Examples not equal to 3:
      game_id  play_id  unique_nfl_ids
0  2023090700     1300               1
1  2023090700     1837               1
2  2023090700     2435               1
3  2023090700     2825               1
4  2023090700     2906               1


In [5]:
trackingDf.loc[trackingDf["player_to_predict"]].copy().drop_duplicates(subset=["game_id", "play_id", "nfl_id"])['player_role'].unique()

array(['Defensive Coverage', 'Targeted Receiver'], dtype=object)