In [1]:
import pandas as pd

In [2]:
tracking = pd.concat([pd.read_csv(f"../data/tracking_week_{i}.csv") for i in range(1,10)])

In [3]:
plays_frames = tracking.drop_duplicates(["gameId","playId","frameId"])[["gameId","playId","frameId","event"]]

In [4]:
plays_events = plays_frames.dropna(subset=["event"]).groupby(["gameId","playId"])["event"].unique()

In [5]:
plays_events

gameId      playId
2022090800  56              [pass_arrived, pass_outcome_caught, tackle]
            80                             [run, first_contact, tackle]
            101             [ball_snap, handoff, first_contact, tackle]
            122       [pass_arrived, pass_outcome_caught, first_cont...
            146             [ball_snap, handoff, first_contact, tackle]
                                            ...                        
2022110700  3658            [ball_snap, handoff, first_contact, tackle]
            3686            [ball_snap, handoff, first_contact, tackle]
            3707            [ball_snap, handoff, first_contact, tackle]
            3740            [ball_snap, handoff, first_contact, tackle]
            3787            [ball_snap, handoff, first_contact, tackle]
Name: event, Length: 12486, dtype: object

In [6]:
events_sequences_counts = plays_events.astype(str).value_counts()
events_sequences = pd.Series(events_sequences_counts[events_sequences_counts>1].index)
events_sequences = events_sequences[events_sequences.apply(lambda x: "pass_outcome_caught" not in x or ("pass_outcome_caught" in x and "pass_arrived" in x))]

In [7]:
valid_plays = plays_events[plays_events.astype(str).isin(events_sequences.values)]

In [8]:
valid_plays

gameId      playId
2022090800  56              [pass_arrived, pass_outcome_caught, tackle]
            80                             [run, first_contact, tackle]
            101             [ball_snap, handoff, first_contact, tackle]
            122       [pass_arrived, pass_outcome_caught, first_cont...
            146             [ball_snap, handoff, first_contact, tackle]
                                            ...                        
2022110700  3658            [ball_snap, handoff, first_contact, tackle]
            3686            [ball_snap, handoff, first_contact, tackle]
            3707            [ball_snap, handoff, first_contact, tackle]
            3740            [ball_snap, handoff, first_contact, tackle]
            3787            [ball_snap, handoff, first_contact, tackle]
Name: event, Length: 11670, dtype: object

In [9]:
plays_frames_valid = plays_frames.set_index(["gameId", "playId"])
plays_frames_valid = plays_frames_valid.loc[valid_plays.index]

In [10]:
plays_frames_valid

Unnamed: 0_level_0,Unnamed: 1_level_0,frameId,event
gameId,playId,Unnamed: 2_level_1,Unnamed: 3_level_1
2022090800,56,1,
2022090800,56,2,pass_arrived
2022090800,56,3,
2022090800,56,4,
2022090800,56,5,
...,...,...,...
2022110700,3787,40,tackle
2022110700,3787,41,
2022110700,3787,42,
2022110700,3787,43,


In [11]:
possible_last_event = ["tackle", "out_of_bounds", "touchdown", "fumble", "qb_slide", "safety"]
run_event = ["handoff", "run"]
ball_snap_event = ["ball_snap", "snap_direct", "autoevent_ballsnap"]

def get_ball_carrier_from_event(group, plays_events):
    events = plays_events.loc[(group["gameId"].iloc[0], group["playId"].iloc[0])]
    if any(element in events for element in run_event):
        is_run_event = group["event"].isin(run_event)
        group.loc[is_run_event, "ball_carrier"] = "ball_carrier"
        if any(element in events for element in possible_last_event):
            group.loc[group["event"].isin(possible_last_event), "ball_carrier"] = "ball_carrier"
        
        if is_run_event.idxmax() != 0:
            group.loc[is_run_event.idxmax()-1, "ball_carrier"] = "qb"
        
        if any(element in events for element in ball_snap_event):
            group.loc[group["event"].isin(ball_snap_event), "ball_carrier"] = "qb"
        
            valid_indices = group["ball_carrier"].notnull()
            start_index = valid_indices.idxmax()
            end_index = valid_indices[::-1].idxmax()
            group.loc[start_index:end_index, "ball_carrier"] = group.loc[start_index:end_index, "ball_carrier"].ffill().bfill()
        
        else:
            group["ball_carrier"] = group["ball_carrier"].bfill()
        
    elif "pass_arrived" in events:
        is_pass_event = group["event"]=="pass_arrived"
        group.loc[is_pass_event, "ball_carrier"] = "ball_carrier"
        if any(element in events for element in possible_last_event):
            group.loc[group["event"].isin(possible_last_event), "ball_carrier"] = "ball_carrier"
        
        valid_indices = group["ball_carrier"].notnull()
        start_index = valid_indices.idxmax()
        end_index = valid_indices[::-1].idxmax()
        group.loc[start_index:end_index, "ball_carrier"] = group.loc[start_index:end_index, "ball_carrier"].ffill().bfill()

    return group

In [12]:
plays_frames_valid["ball_carrier"] = None
plays_frames_valid.loc[(2022090800,101)].reset_index().groupby(["gameId","playId"]).apply(lambda x: get_ball_carrier_from_event(x, plays_events))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gameId,playId,frameId,event,ball_carrier
gameId,playId,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022090800,101,0,2022090800,101,1,,
2022090800,101,1,2022090800,101,2,,
2022090800,101,2,2022090800,101,3,,
2022090800,101,3,2022090800,101,4,,
2022090800,101,4,2022090800,101,5,,
2022090800,101,5,2022090800,101,6,ball_snap,qb
2022090800,101,6,2022090800,101,7,,qb
2022090800,101,7,2022090800,101,8,,qb
2022090800,101,8,2022090800,101,9,,qb
2022090800,101,9,2022090800,101,10,,qb


In [13]:
plays = pd.read_csv("../data/plays.csv")
players = pd.read_csv("../data/players.csv")

players_positions = tracking[["gameId", "playId","nflId"]].drop_duplicates().dropna().merge(players[["nflId", "position"]], on="nflId")
qb_players = players_positions[players_positions["position"]=="QB"].drop(columns="position").rename(columns={"nflId": "qbId"})

In [14]:
def _get_ball_carrier_id(x):
    if x["ball_carrier"]=="qb":
        res = x["qbId"]
    elif x["ball_carrier"]=="ball_carrier":
        res = x["ballCarrierId"]
    else:
        res = None
    return res

In [16]:
test_play = plays_frames_valid.loc[(2022090800,101)].reset_index().groupby(["gameId","playId"]).apply(
    lambda x: get_ball_carrier_from_event(x, plays_events)
).reset_index(drop=True).merge(
    plays[["gameId", "playId", "ballCarrierId", "defensiveTeam", "absoluteYardlineNumber", "yardsToGo"]], on=["gameId","playId"]
)
test_play = test_play.merge(qb_players, on=["gameId", "playId"])
test_play["ball_carrier_id"] = test_play.apply(_get_ball_carrier_id, axis=1)

In [17]:
test_play

Unnamed: 0,gameId,playId,frameId,event,ball_carrier,ballCarrierId,defensiveTeam,absoluteYardlineNumber,yardsToGo,qbId,ball_carrier_id
0,2022090800,101,1,,,47857,LA,72,10,46076.0,
1,2022090800,101,2,,,47857,LA,72,10,46076.0,
2,2022090800,101,3,,,47857,LA,72,10,46076.0,
3,2022090800,101,4,,,47857,LA,72,10,46076.0,
4,2022090800,101,5,,,47857,LA,72,10,46076.0,
5,2022090800,101,6,ball_snap,qb,47857,LA,72,10,46076.0,46076.0
6,2022090800,101,7,,qb,47857,LA,72,10,46076.0,46076.0
7,2022090800,101,8,,qb,47857,LA,72,10,46076.0,46076.0
8,2022090800,101,9,,qb,47857,LA,72,10,46076.0,46076.0
9,2022090800,101,10,,qb,47857,LA,72,10,46076.0,46076.0


In [18]:
test_play_tracking = tracking[["gameId", "playId","nflId","frameId", "club","x","y", "playDirection"]].merge(test_play, how="inner", on=["gameId", "playId","frameId"])
test_play_tracking["is_defense"] = test_play_tracking["club"] == test_play_tracking["defensiveTeam"]
test_play_tracking["is_ball_carrying"] = test_play_tracking["nflId"] == test_play_tracking["ball_carrier_id"]

In [19]:
test_play_tracking

Unnamed: 0,gameId,playId,nflId,frameId,club,x,y,playDirection,event,ball_carrier,ballCarrierId,defensiveTeam,absoluteYardlineNumber,yardsToGo,qbId,ball_carrier_id,is_defense,is_ball_carrying
0,2022090800,101,35472.0,1,BUF,73.130000,27.640000,left,,,47857,LA,72,10,46076.0,,False,False
1,2022090800,101,38577.0,1,LA,67.320000,29.830000,left,,,47857,LA,72,10,46076.0,,True,False
2,2022090800,101,41239.0,1,LA,71.280000,26.410000,left,,,47857,LA,72,10,46076.0,,True,False
3,2022090800,101,42392.0,1,BUF,72.500000,29.460000,left,,,47857,LA,72,10,46076.0,,False,False
4,2022090800,101,42816.0,1,LA,64.670000,40.760000,left,,,47857,LA,72,10,46076.0,,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1122,2022090800,101,48512.0,49,BUF,73.340000,39.510000,left,,,47857,LA,72,10,46076.0,,False,False
1123,2022090800,101,52536.0,49,BUF,60.050000,34.310000,left,,,47857,LA,72,10,46076.0,,False,False
1124,2022090800,101,53079.0,49,BUF,66.780000,46.350000,left,,,47857,LA,72,10,46076.0,,False,False
1125,2022090800,101,53522.0,49,BUF,67.180000,44.930000,left,,,47857,LA,72,10,46076.0,,False,False


In [20]:
test_play_tracking.to_csv("../data/test_play_tracking.csv", index=False)