In [140]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data,DataLoader
from torch_geometric.nn import GCNConv
from tqdm import tqdm
import numpy as np

In [None]:
## Reading in Data

plays_data = pd.read_csv("data/plays.csv")
player_data = pd.read_csv("data/players.csv")
player_play_data = pd.read_csv("data/player_play.csv")
game_data = pd.read_csv("data/games.csv")

passing_plays = plays_data[plays_data["passResult"].isin(["C", "I", "IN"])]

In [134]:
## Helper Functions

# Calculate distance between nodes
def get_edge_weights(edges, play, tracking_data):
    # Initializing weights array
    weights = []

    for i in range(len(edges[0])):
        # Getting 2 players from tracking data
        P1 = tracking_data[(tracking_data["gameId"] == play.gameId) & (tracking_data["playId"] == play.playId) & (tracking_data["nflId"] == edges[0][i])]
        P2 = tracking_data[(tracking_data["gameId"] == play.gameId) & (tracking_data["playId"] == play.playId) & (tracking_data["nflId"] == edges[1][i])]

        # Converting players to points
        point1 = np.array([P1["x"], P1["y"]])
        point2 = np.array([P2["x"], P2["y"]])

        # Calculating distance between players
        dist = np.linalg.norm(point2 - point1)

        weights.append(dist)

    return weights

# Finding targeted node
def get_targeted_receiver(Recs, nflIDs):
    
    for WR in Recs.itertuples(index=True):
        if (WR.wasTargettedReceiver):
            return np.where(nflIDs == WR.nflId)[0][0]
        
def adjust_position(row, game, play):

    if play["possessionTeam"].iloc[0] == game["homeTeamAbbr"].iloc[0]:
        if play["yardlineSide"].iloc[0] == game["homeTeamAbbr"].iloc[0]:
            absYard = play["yardlineNumber"].iloc[0] + 10
        elif play["yardlineSide"].iloc[0] == game["visitorTeamAbbr"].iloc[0]:
            absYard = 110 - play["yardlineNumber"].iloc[0]
        else:
            absYard = 60

        row.loc["x"] -= absYard
        row.loc["o"] += 90
        row.loc["dir"] += 90
        if row["o"] > 360: row["o"] -= 360
        if row["dir"] > 360: row["dir"] -= 360

    elif play["possessionTeam"].iloc[0] == game["visitorTeamAbbr"].iloc[0]:
        if play["yardlineSide"].iloc[0] == game["visitorTeamAbbr"].iloc[0]:
            absYard = 110 - play["yardlineNumber"].iloc[0]
        elif play["yardlineSide"].iloc[0] == game["homeTeamAbbr"].iloc[0]:
            absYard = play["yardlineNumber"].iloc[0] + 10
        else:
            absYard = 60

        row.loc["x"] = absYard - row["x"]
        row.loc["y"] = 53.3 - row["y"]
        row.loc["o"] += 270
        row.loc["dir"] += 270
        if row["o"] > 360: row["o"] -= 360
        if row["dir"] > 360: row["dir"] -= 360

    else:
        print("SOMETHING WENT WRONG")

    return row

In [64]:
def prepare_data(graph):
    # Extract the edges, edge_weights, receivers, and defenders
    edge_index = torch.tensor(graph['edges'], dtype=torch.long)
    edge_weights = torch.tensor(graph['edge_weights'], dtype=torch.float)
    
    # Concatenate receivers and defenders to create node features
    receivers = torch.tensor(graph['receivers'], dtype=torch.float)
    defenders = torch.tensor(graph['defenders'], dtype=torch.float)
    x = torch.cat((receivers, defenders), dim=0)  # Combine receivers and defenders as node features
    
    # Labels (target values)
    y = torch.tensor([graph['y']], dtype=torch.float).view(-1)  # Assuming 'y' is the target label
    
    # Create a PyTorch Geometric Data object
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_weights, y=y)
    return data

In [65]:
class GNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)
        self.fc = nn.Linear(output_dim, 1)  # Assuming regression task for openness prediction

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index, edge_attr)
        x = torch.relu(x)
        x = self.conv2(x, edge_index, edge_attr)
        x = torch.relu(x)
        x = self.fc(x)
        return x

In [138]:

# Getting tracking data at moment of pass
week_data = pd.read_csv("data/tracking_week_1.csv")
week_data = week_data[week_data["event"].isin(["pass_forward", "pass_shovel"])]

# Getting list of completions by gameId and playId
completions = passing_plays[passing_plays["passResult"] == "C"]
playIds = completions[["gameId", "playId"]].drop_duplicates()

# Filtering tracking data to only completion plays
week_data_completions = week_data.merge(playIds, on=["gameId", "playId"])

# Removing playIds not from this week
playIds = playIds[playIds["gameId"].isin(set(week_data_completions["gameId"]))]

graph_data = []

for i,row in tqdm(enumerate(playIds.itertuples(index=True)), total=len(playIds), desc="Processing Plays", unit="play"):

    # Initializing play_data
    play_data = {}

    # Getting list of eligible receivers and defenders in coverage
    Recs = player_play_data[(player_play_data["gameId"] == row.gameId) & (player_play_data["playId"] == row.playId) & (player_play_data["wasRunningRoute"] == True)]
    Defs = player_play_data[(player_play_data["gameId"] == row.gameId) & (player_play_data["playId"] == row.playId) & ~pd.isna(player_play_data["pff_defensiveCoverageAssignment"])]

    # Creating edges array:  size = [2, len(Receivers) * len(Defenders)]
    R_nodes = np.arange(len(Recs))
    D_nodes = np.arange(len(Recs), len(Defs) + len(Recs))
    play_data["edges"] = np.vstack([np.repeat(R_nodes, len(Defs)), np.tile(D_nodes, len(Recs))])

    # Creating edges array with nflIds
    R_nodes2 = Recs["nflId"].values
    D_nodes2 = Defs["nflId"].values
    nflIDs = np.concatenate([Recs["nflId"].values, Defs["nflId"].values])
    edgeIDs = np.vstack([np.repeat(R_nodes2, len(Defs)), np.tile(D_nodes2, len(Recs))])

    # Getting distance from each offensive player to each defensive player
    play_data["edge_weights"] = get_edge_weights(edgeIDs, row, week_data_completions)

    # Getting current game and play data
    curr_game = game_data[game_data["gameId"] == row.gameId]
    curr_play = plays_data[(plays_data["gameId"] == row.gameId) & (plays_data["playId"] == row.playId)]

    # Creating vectors of node data for each player
    # Current Vector = [ x, y, speed, acceleration, orientation, direction of motion ]
    Receivers = []
    for WR in Recs.itertuples(index=True):
        WR_row = week_data_completions[(week_data_completions["gameId"] == row.gameId) & (week_data_completions["playId"] == row.playId) & (week_data_completions["nflId"] == WR.nflId)].iloc[0]

        # Adjusting position data
        WR_row = adjust_position(WR_row, curr_game, curr_play)

        Receivers.append([WR_row["x"], WR_row["y"], WR_row["s"], WR_row["a"], WR_row["o"], WR_row["dir"]])

    Defenders = []
    for Def in Defs.itertuples(index=True):
        Def_row = week_data_completions[(week_data_completions["gameId"] == row.gameId) & (week_data_completions["playId"] == row.playId) & (week_data_completions["nflId"] == Def.nflId)].iloc[0]
        
        # Adjusting position data
        Def_row = adjust_position(Def_row, curr_game, curr_play)
        
        Defenders.append([Def_row["x"], Def_row["y"], Def_row["s"], Def_row["a"], Def_row["o"], Def_row["dir"]])

    # Adding Receiver and Defender arrays to play_data
    play_data["receivers"] = Receivers
    play_data["defenders"] = Defenders

    play_data["y"] = get_targeted_receiver(Recs, nflIDs)

    graph_data.append(play_data)


Processing Plays: 100%|██████████| 696/696 [00:33<00:00, 21.03play/s]


In [141]:
# print(graph_data)
data_list = []
for graph in graph_data:
    data = prepare_data(graph)
    data_list.append(data)

input_dim = data.x.size(1)  # Features per node (6 for both receivers and defenders)
hidden_dim = 64
output_dim = 32  # You can adjust this based on your task
model = GNNModel(input_dim, hidden_dim, output_dim)

# Print the model
print(model)

GNNModel(
  (conv1): GCNConv(6, 64)
  (conv2): GCNConv(64, 32)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)
