In [1]:
from pathlib import Path
import pandas as pd


In [2]:
projectRoot = Path.cwd()
trackingDf = pd.read_csv(f'{projectRoot}/data/train/input_2023_w01.csv')

In [13]:
# unique players to predict per play
cand = (
    trackingDf.loc[trackingDf["player_to_predict"]]
    .dropna(subset=["nfl_id"])  # optional, if nfl_id can be NaN
    .drop_duplicates(subset=["game_id", "play_id", "nfl_id"])
)

# count unique nfl_id per (game_id, play_id)
per_play_unique = cand.groupby(["game_id", "play_id"])["nfl_id"].nunique()

# 1) Check if EVERY play has exactly 3
all_exactly_three = (per_play_unique == 3).all()

# 2) Plays that are NOT exactly 3 (to inspect)
not_three = per_play_unique[per_play_unique == 1].reset_index(name="unique_nfl_ids")

# 3) Quick summary
summary = per_play_unique.value_counts().sort_index()
print("All plays have exactly 3?", all_exactly_three)
print("Counts distribution (unique nfl_id per play):")
print(summary)
print("Examples not equal to 3:")
print(not_three.head())


All plays have exactly 3? False
Counts distribution (unique nfl_id per play):
nfl_id
1     66
2    167
3    267
4    177
5     92
6     41
7      8
8      1
Name: count, dtype: int64
Examples not equal to 3:
      game_id  play_id  unique_nfl_ids
0  2023090700     1300               1
1  2023090700     1837               1
2  2023090700     2435               1
3  2023090700     2825               1
4  2023090700     2906               1


In [None]:
trackingDf.loc[trackingDf["player_to_predict"]].copy().drop_duplicates(subset=["game_id", "play_id", "nfl_id"])['player_role'].unique()

Unnamed: 0,game_id,play_id,player_to_predict,nfl_id,frame_id,play_direction,absolute_yardline_number,player_name,player_height,player_weight,...,player_role,x,y,s,a,dir,o,num_frames_output,ball_land_x,ball_land_y
26,2023090700,101,True,46137,1,right,42,Justin Reid,6-1,204,...,Defensive Coverage,51.32,20.69,0.31,0.49,79.43,267.68,21,63.259998,-0.220000
52,2023090700,101,True,52546,1,right,42,L'Jarius Sneed,6-1,193,...,Defensive Coverage,44.42,11.69,0.98,1.31,86.89,66.54,21,63.259998,-0.220000
208,2023090700,101,True,44930,1,right,42,Josh Reynolds,6-3,196,...,Targeted Receiver,41.03,12.17,0.00,0.00,156.35,80.97,21,63.259998,-0.220000
234,2023090700,194,True,44888,1,left,89,Alex Anzalone,6-3,240,...,Defensive Coverage,84.92,32.43,0.00,0.00,93.22,100.58,9,84.940002,21.750000
266,2023090700,194,True,55910,1,left,89,Brian Branch,6-0,193,...,Defensive Coverage,86.70,21.16,1.31,1.84,95.06,76.97,9,84.940002,21.750000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285101,2023091100,3942,True,54469,1,left,90,Sauce Gardner,6-3,200,...,Defensive Coverage,86.07,37.37,0.71,0.49,318.69,109.78,9,78.239998,43.419998
285262,2023091100,3942,True,47879,1,left,90,Dawson Knox,6-4,254,...,Targeted Receiver,91.06,35.05,0.03,0.63,277.78,267.86,9,78.239998,43.419998
285285,2023091100,3987,True,41243,1,left,87,C.J. Mosley,6-2,250,...,Defensive Coverage,81.51,25.32,0.40,0.69,147.51,80.83,11,73.870003,11.480000
285318,2023091100,3987,True,46211,1,left,87,D.J. Reed,5-9,188,...,Defensive Coverage,79.30,12.73,0.29,0.94,352.08,69.77,11,73.870003,11.480000
