In [3]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch_geometric.data import Data,DataLoader
from torch_geometric.nn import GCNConv
from torch_geometric.utils import to_dense_batch
from tqdm import tqdm
import numpy as np
import json

In [4]:
## Reading in Data

plays_data = pd.read_csv("data/plays.csv")
player_data = pd.read_csv("data/players.csv")
player_play_data = pd.read_csv("data/player_play.csv")
game_data = pd.read_csv("data/games.csv")

passing_plays = plays_data[plays_data["passResult"].isin(["C", "I", "IN"])]

In [49]:
## Helper Functions

def get_edge_weights(edges, play, tracking_data, passing):
    # Only keep necessary events
    if passing:
        event_type = ["pass_forward", "pass_shovel"]
    else:
        event_type = ["pass_arrived"]

    # Filter to relevant tracking data for this play
    play_tracking = tracking_data[
        (tracking_data["gameId"] == play.gameId) &
        (tracking_data["playId"] == play.playId) &
        (tracking_data["event"].isin(event_type))
    ]

    # Create a MultiIndex for faster lookup
    play_tracking_indexed = play_tracking.set_index(["nflId", "event"])

    weights = []
    for p1, p2 in zip(edges[0], edges[1]):
        try:
            P1 = play_tracking_indexed.loc[(p1, event_type[0])]
            P2 = play_tracking_indexed.loc[(p2, event_type[0])]

            point1 = np.array([P1["x"], P1["y"]])
            point2 = np.array([P2["x"], P2["y"]])

            dist = np.linalg.norm(point2 - point1)
            weights.append(dist)
        except KeyError:
            weights.append(np.nan)  # Or some fallback distance
            continue

    return weights

# Finding targeted node
def get_targeted_receiver(Recs, nflIDs):
    
    for WR in Recs.itertuples(index=True):
        if (WR.wasTargettedReceiver):
            return np.where(nflIDs == WR.nflId)[0][0]
        
def adjust_position(row, game, play):

    if play["possessionTeam"].iloc[0] == game["homeTeamAbbr"].iloc[0]:
        if play["yardlineSide"].iloc[0] == game["homeTeamAbbr"].iloc[0]:
            absYard = play["yardlineNumber"].iloc[0] + 10
        elif play["yardlineSide"].iloc[0] == game["visitorTeamAbbr"].iloc[0]:
            absYard = 110 - play["yardlineNumber"].iloc[0]
        else:
            absYard = 60

        row.loc["x"] -= absYard
        row.loc["o"] += 90
        row.loc["dir"] += 90
        if row["o"] > 360: row["o"] -= 360
        if row["dir"] > 360: row["dir"] -= 360

    elif play["possessionTeam"].iloc[0] == game["visitorTeamAbbr"].iloc[0]:
        if play["yardlineSide"].iloc[0] == game["visitorTeamAbbr"].iloc[0]:
            absYard = 110 - play["yardlineNumber"].iloc[0]
        elif play["yardlineSide"].iloc[0] == game["homeTeamAbbr"].iloc[0]:
            absYard = play["yardlineNumber"].iloc[0] + 10
        else:
            absYard = 60

        row.loc["x"] = absYard - row["x"]
        row.loc["y"] = 53.3 - row["y"]
        row.loc["o"] += 270
        row.loc["dir"] += 270
        if row["o"] > 360: row["o"] -= 360
        if row["dir"] > 360: row["dir"] -= 360

    else:
        print("SOMETHING WENT WRONG")

    return row

# Getting closest distance to each receiver
def get_closest_defender(edgeIDs, row, week_data_plays, num_recs, num_defs):
    # Getting edge weights
    edge_weights = get_edge_weights(edgeIDs, row, week_data_plays, 0)

    output = []
    index = 0
    for i in range(num_recs):
        curr_closest = 99999

        for j in range(num_defs):
            if edge_weights[index] < curr_closest:
                curr_closest = edge_weights[index]
            index += 1
        
        output.append(curr_closest)

    return output


In [50]:

# Getting tracking data at moment of pass
week_data = pd.read_csv("data/tracking_week_1.csv")
week_data_passed = week_data[week_data["event"].isin(["pass_forward", "pass_shovel"])]
week_data_arrived = week_data[week_data["event"].isin(["pass_arrived"])]

# Getting list of gameId and playId for pass arrived plays
arrived_play_keys = set(zip(week_data_arrived["gameId"], week_data_arrived["playId"]))

# Getting list of completions, picks, and incompletions by gameId and playId
completions = passing_plays[passing_plays["passResult"].isin(["C","I","IN"])]
playIds = completions[["gameId", "playId"]].drop_duplicates()

# Combining passed and arrived plays into one dataframe
week_data_plays_temp = pd.concat([week_data_passed, week_data_arrived], ignore_index=True)

# Sorting by plays with completions, incompletions, and interceptions
week_data_plays = week_data_plays_temp.merge(playIds, on=["gameId", "playId"])

# Removing playIds not from this week
playIds = playIds[playIds["gameId"].isin(set(week_data_plays["gameId"]))]

graph_data = []

for i,row in tqdm(enumerate(playIds.itertuples(index=True)), total=len(playIds), desc="Processing Plays", unit="play"):

    if (row.gameId, row.playId) not in arrived_play_keys:
        continue

    # Initializing play_data
    play_data = {}

    # Getting list of eligible receivers and defenders in coverage
    Recs = player_play_data[(player_play_data["gameId"] == row.gameId) & (player_play_data["playId"] == row.playId) & (player_play_data["wasRunningRoute"] == True)]
    Defs = player_play_data[(player_play_data["gameId"] == row.gameId) & (player_play_data["playId"] == row.playId) & ~pd.isna(player_play_data["pff_defensiveCoverageAssignment"])]

    # Creating edges array:  size = [2, len(Receivers) * len(Defenders)]
    R_nodes = np.arange(len(Recs))
    D_nodes = np.arange(len(Recs), len(Defs) + len(Recs))
    play_data["edges"] = np.vstack([np.repeat(R_nodes, len(Defs)), np.tile(D_nodes, len(Recs))])

    # Creating edges array with nflIds
    R_nodes2 = Recs["nflId"].values
    D_nodes2 = Defs["nflId"].values
    nflIDs = np.concatenate([Recs["nflId"].values, Defs["nflId"].values])       # List of nflIds
    edgeIDs = np.vstack([np.repeat(R_nodes2, len(Defs)), np.tile(D_nodes2, len(Recs))])     # Structured edgeIds

    # Getting distance from each offensive player to each defensive player
    play_data["edge_weights"] = get_edge_weights(edgeIDs, row, week_data_plays, 1)

    # Getting current game and play data
    curr_game = game_data[game_data["gameId"] == row.gameId]
    curr_play = plays_data[(plays_data["gameId"] == row.gameId) & (plays_data["playId"] == row.playId)]

    # Creating vectors of node data for each player
    # Current Vector = [ x, y, speed, acceleration, orientation, direction of motion ]
    Receivers = []
    for WR in Recs.itertuples(index=True):
        WR_row = week_data_plays[(week_data_plays["gameId"] == row.gameId) & (week_data_plays["playId"] == row.playId) & (week_data_plays["nflId"] == WR.nflId)].iloc[0]

        # Adjusting position data
        WR_row = adjust_position(WR_row, curr_game, curr_play)

        Receivers.append([1, WR_row["x"], WR_row["y"], WR_row["s"], WR_row["a"], WR_row["o"], WR_row["dir"]])   # 1 = Offensive Player

    Defenders = []
    for Def in Defs.itertuples(index=True):
        Def_row = week_data_plays[(week_data_plays["gameId"] == row.gameId) & (week_data_plays["playId"] == row.playId) & (week_data_plays["nflId"] == Def.nflId)].iloc[0]
        
        # Adjusting position data
        Def_row = adjust_position(Def_row, curr_game, curr_play)
        
        Defenders.append([0, Def_row["x"], Def_row["y"], Def_row["s"], Def_row["a"], Def_row["o"], Def_row["dir"]])     # 0 = Defensive Player

    # Adding Receiver and Defender arrays to play_data
    play_data["receivers"] = Receivers
    play_data["defenders"] = Defenders

    play_data["y"] = get_closest_defender(edgeIDs, row, week_data_plays, len(Recs), len(Defs))

    graph_data.append(play_data)


  P1 = play_tracking_indexed.loc[(p1, event_type[0])]
  P2 = play_tracking_indexed.loc[(p2, event_type[0])]
  P1 = play_tracking_indexed.loc[(p1, event_type[0])]
  P2 = play_tracking_indexed.loc[(p2, event_type[0])]
Processing Plays: 100%|██████████| 1085/1085 [00:29<00:00, 36.82play/s]
