In [2]:
import pandas as pd
import torch
from tqdm import tqdm
import numpy as np

In [3]:
## Reading in Data

play_data = pd.read_csv("data/plays.csv")
player_data = pd.read_csv("data/players.csv")
player_play_data = pd.read_csv("data/player_play.csv")

passing_plays = play_data[play_data["passResult"].isin(["C", "I", "IN"])]

In [26]:
## Helper Functions

# Calculate distance between nodes
def get_edge_weights(edges, play, tracking_data):
    # Initializing weights array
    weights = []

    for i in range(len(edges[0])):
        # Getting 2 players from tracking data
        P1 = tracking_data[(tracking_data["gameId"] == play.gameId) & (tracking_data["playId"] == play.playId) & (tracking_data["nflId"] == edges[0][i])]
        P2 = tracking_data[(tracking_data["gameId"] == play.gameId) & (tracking_data["playId"] == play.playId) & (tracking_data["nflId"] == edges[1][i])]

        # Converting players to points
        point1 = np.array([P1["x"], P1["y"]])
        point2 = np.array([P2["x"], P2["y"]])

        # Calculating distance between players
        dist = np.linalg.norm(point2 - point1)

        weights.append(dist)

    return weights

# Finding targeted node
def get_targeted_receiver(Recs, nflIDs):
    
    for WR in Recs.itertuples(index=True):
        if (WR.wasTargettedReceiver):
            return np.where(nflIDs == WR.nflId)[0][0]

In [36]:

# Getting tracking data at moment of pass
week_data = pd.read_csv("data/tracking_week_1.csv")
week_data = week_data[week_data["event"].isin(["pass_forward", "pass_shovel"])]

# Getting list of completions by gameId and playId
completions = passing_plays[passing_plays["passResult"] == "C"]
playIds = completions[["gameId", "playId"]].drop_duplicates()

# Filtering tracking data to only completion plays
week_data_completions = week_data.merge(playIds, on=["gameId", "playId"])

# Removing playIds not from this week
playIds = playIds[playIds["gameId"].isin(set(week_data_completions["gameId"]))]

graph_data = []

for i,row in tqdm(enumerate(playIds.itertuples(index=True)), total=len(playIds), desc="Processing Plays", unit="play"):

    # Initializing play_data
    play_data = {}

    # Getting list of eligible receivers and defenders in coverage
    Recs = player_play_data[(player_play_data["gameId"] == row.gameId) & (player_play_data["playId"] == row.playId) & (player_play_data["wasRunningRoute"] == True)]
    Defs = player_play_data[(player_play_data["gameId"] == row.gameId) & (player_play_data["playId"] == row.playId) & ~pd.isna(player_play_data["pff_defensiveCoverageAssignment"])]

    # Creating edges array:  size = [2, len(Receivers) * len(Defenders)]
    R_nodes = np.arange(len(Recs))
    D_nodes = np.arange(len(Recs), len(Defs) + len(Recs))
    play_data["edges"] = np.vstack([np.repeat(R_nodes, len(Defs)), np.tile(D_nodes, len(Recs))])

    # Creating edges array with nflIds
    R_nodes2 = Recs["nflId"].values
    D_nodes2 = Defs["nflId"].values
    nflIDs = np.concatenate([Recs["nflId"].values, Defs["nflId"].values])
    edgeIDs = np.vstack([np.repeat(R_nodes2, len(Defs)), np.tile(D_nodes2, len(Recs))])

    # Getting distance from each offensive player to each defensive player
    play_data["edge_weights"] = get_edge_weights(edgeIDs, row, week_data_completions)

    # Creating vectors of node data for each player
    # Current Vector = [ x, y, speed, acceleration, orientation, direction of motion ]
    Receivers = []
    for WR in Recs.itertuples(index=True):
        WR_row = week_data_completions[(week_data_completions["gameId"] == row.gameId) & (week_data_completions["playId"] == row.playId) & (week_data_completions["nflId"] == WR.nflId)]
        Receivers.append([WR_row["x"].iloc[0], WR_row["y"].iloc[0], WR_row["s"].iloc[0], WR_row["a"].iloc[0], WR_row["o"].iloc[0], WR_row["dir"].iloc[0]])
    Defenders = []
    for Def in Defs.itertuples(index=True):
        Def_row = week_data_completions[(week_data_completions["gameId"] == row.gameId) & (week_data_completions["playId"] == row.playId) & (week_data_completions["nflId"] == Def.nflId)]
        Defenders.append([Def_row["x"].iloc[0], Def_row["y"].iloc[0], Def_row["s"].iloc[0], Def_row["a"].iloc[0], Def_row["o"].iloc[0], Def_row["dir"].iloc[0]])

    # Adding Receiver and Defender arrays to play_data
    play_data["receivers"] = Receivers
    play_data["defenders"] = Defenders

    play_data["y"] = get_targeted_receiver(Recs, nflIDs)

    graph_data.append(play_data)

Processing Plays: 100%|██████████| 696/696 [00:32<00:00, 21.71play/s]
