In [37]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.utils import to_dense_batch
from tqdm import tqdm
import numpy as np
import json
import os

In [38]:

plays_data = pd.read_csv("data/plays.csv")
player_data = pd.read_csv("data/players.csv")
player_play_data = pd.read_csv("data/player_play.csv")
game_data = pd.read_csv("data/games.csv")

passing_plays = plays_data[plays_data["passResult"].isin(["C", "I", "IN"])]

weight_params = {
    'dis': 1.0,
    's': 0.3,
    'a': 0.3,
    'dir': 0.12,
    'o': 0.12
}

def compute_edge_weight(player1, player2, weights):
    # print(player1, player2)
    """
    Calculate a combined edge weight between two players.
    
    Parameters:
    - player1, player2: Pandas Series or dict-like objects with keys 
      'x', 'y', 'speed', 'acceleration', 'dir', 'o'.
    - weights: Dictionary with keys 'distance', 'speed', 'acc', 'dir', 'ori'
      representing the coefficient for each term.
      
    Returns:
    - A scalar weight for the edge.
    """
    # Calculate Euclidean distance between positions.
    pos_diff = np.sqrt((player1['x'] - player2['x'])**2 + (player1['y'] - player2['y'])**2)
    
    # Compute absolute differences for speed and acceleration.
    speed_diff = abs(player1['s'] - player2['s'])
    acc_diff = abs(player1['a'] - player2['a'])
    
    # Compute the angle differences (ensure angles are treated correctly).
    dir_diff = angle_difference(player1['dir'], player2['dir'])
    ori_diff = angle_difference(player1['o'], player2['o'])
    
    # Combine using the provided weights.
    edge_weight = (weights['dis'] * pos_diff +
                   weights['s'] * speed_diff +
                   weights['a'] * acc_diff +
                   weights['dir'] * dir_diff +
                   weights['o'] * ori_diff)
    return edge_weight


def angle_difference(angle1, angle2):
    """
    Compute the minimal absolute difference between two angles (in degrees)
    taking into account wrap-around at 360 degrees.
    """
    diff = abs(angle1 - angle2) % 360
    if diff > 180:
        diff = 360 - diff
    return diff

def adjust_position(row, game, play): # game and play are rows of the game and play dataframes

    row = row.copy()

    if play["possessionTeam"].iloc[0] == game["homeTeamAbbr"].iloc[0]:
        if play["yardlineSide"].iloc[0] == game["homeTeamAbbr"].iloc[0]:
            absYard = play["yardlineNumber"].iloc[0] + 10
        elif play["yardlineSide"].iloc[0] == game["visitorTeamAbbr"].iloc[0]:
            absYard = 110 - play["yardlineNumber"].iloc[0]
        else:
            absYard = 60

        row.loc["x"] -= absYard
        row.loc["o"] += 90
        row.loc["dir"] += 90
        if row["o"] > 360: row["o"] -= 360
        if row["dir"] > 360: row["dir"] -= 360

    elif play["possessionTeam"].iloc[0] == game["visitorTeamAbbr"].iloc[0]:
        if play["yardlineSide"].iloc[0] == game["visitorTeamAbbr"].iloc[0]:
            absYard = 110 - play["yardlineNumber"].iloc[0]
        elif play["yardlineSide"].iloc[0] == game["homeTeamAbbr"].iloc[0]:
            absYard = play["yardlineNumber"].iloc[0] + 10
        else:
            absYard = 60

        row.loc["x"] = absYard - row["x"]
        row.loc["y"] = 53.3 - row["y"]
        row.loc["o"] += 270
        row.loc["dir"] += 270
        if row["o"] > 360: row["o"] -= 360
        if row["dir"] > 360: row["dir"] -= 360

    else:
        print("SOMETHING WENT WRONG")

    return row


def get_targeted_receiver(Recs, nflIDs):
    
    for WR in Recs.itertuples(index=True):
        if (WR.wasTargettedReceiver):
            return np.where(nflIDs == WR.nflId)[0][0]

def process_play(row):
    # print(row)
    currPlayId = row["playId"]
    currGameId = row["gameId"]

    curr_game = game_data[game_data["gameId"] == row["gameId"]]
    curr_play = plays_data[(plays_data["gameId"] == row["gameId"]) & (plays_data["playId"] == row["playId"])]

    row = adjust_position(row, curr_game, curr_play)
    # print("-----------------")

    Recs = player_play_data[(player_play_data["gameId"] == row["gameId"]) & (player_play_data["playId"] == row["playId"]) & (player_play_data["wasRunningRoute"] == True)]
    Defs = player_play_data[(player_play_data["gameId"] == row["gameId"]) & (player_play_data["playId"] == row["playId"]) & ~pd.isna(player_play_data["pff_defensiveCoverageAssignment"])]

    Recs = week_data_passed[(week_data_passed["gameId"] == row["gameId"]) & (week_data_passed["playId"] == row["playId"]) & (week_data_passed["nflId"].isin(Recs["nflId"]))].copy()
    Defs = week_data_passed[(week_data_passed["gameId"] == row["gameId"]) & (week_data_passed["playId"] == row["playId"]) & (week_data_passed["nflId"].isin(Defs["nflId"]))].copy()

    Recs = Recs.reset_index(drop=True)
    Defs = Defs.reset_index(drop=True)

    R_nodes = np.arange(len(Recs))
    D_nodes = np.arange(len(Recs), len(Defs) + len(Recs))

    play_data = {} # for analyzing all recs/defenders
    play_data["edges"] = np.vstack([np.repeat(R_nodes, len(Defs)), np.tile(D_nodes, len(Recs))])

    edge_index_list = []
    edge_weight_list = []

    for rec_idx, rec in Recs.iterrows():
        for def_idx, defe in Defs.iterrows():
            # Append the edge: receiver -> defender.
            edge_index_list.append([rec_idx, def_idx + len(Recs)])
            
            # Compute the edge weight using the custom function.
            weight_value = compute_edge_weight(rec, defe, weight_params)
            edge_weight_list.append([weight_value])  # Wrapped in a list for proper shape



    
    currPlay = plays_data[(plays_data["playId"] == currPlayId) & (plays_data["gameId"] == currGameId)]
    # print(currPlay)

    return edge_index_list, edge_weight_list, Recs, Defs, currPlay



week_data = pd.read_csv("data/tracking_week_1.csv")
week_data_passed = week_data[week_data["event"].isin(["pass_forward", "pass_shovel"])]
week_data_arrived = week_data[week_data["event"].isin(["pass_arrived"])]
# print(len(week_data_arrived))
# print(week_data_arrived.iloc[0])

# arrived_play_keys = set(zip(week_data_arrived["gameId"], week_data_arrived["playId"]))
# print(len(arrived_play_keys))

a=0
data_list = []

if os.path.exists("processed_plays.pt"):
    print("Loading cached processed plays...")
    data_list = torch.load("processed_plays.pt")
else:
    for i, row in tqdm(week_data_passed.iterrows(), total=len(week_data_passed), desc="Processing Plays", unit="play"):
        # print(i, row)
        edge_index_list, edge_weight_list, Recs, Defs, currPlay = process_play(row)
        if Recs.empty or Defs.empty:
                continue
        rec_features = Recs[['x', 'y', 's', 'a', 'dir', 'o']].to_numpy()
        def_features = Defs[['x', 'y', 's', 'a', 'dir', 'o']].to_numpy()
        node_features = np.vstack([rec_features, def_features])
        x = torch.tensor(node_features, dtype=torch.float)

        if len(edge_index_list) == 0:
            continue

        edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous() 
        edge_attr = torch.tensor(edge_weight_list, dtype=torch.float) 

        pass_result = currPlay["passResult"].iloc[0]
        if pass_result == "S":
            continue
        # print("Pass Result:", pass_result)
        label_mapping = {"C": 0, "I": 1, "IN": 2}
        y = torch.tensor(label_mapping[pass_result], dtype=torch.long)

        data_obj = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
        data_obj.batch = torch.zeros(x.size(0), dtype=torch.long)
        data_list.append(data_obj)

torch.save(data_list, "processed_plays.pt")



class NFLGraphClassifier(nn.Module):
    def __init__(self, in_channels, hidden_channels, num_classes):
        super(NFLGraphClassifier, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        # A fully connected layer to map the global pooled vector to class logits.
        self.fc = nn.Linear(hidden_channels, num_classes)
    
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch if hasattr(data, 'batch') else None
        
        # First GCN layer with ReLU activation.
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        # Second GCN layer.
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        
        # Global pooling: aggregate node embeddings to form a graph representation.
        # If working with a single graph, you can supply a batch tensor of zeros.
        if batch is None:  
            batch = torch.zeros(x.size(0), dtype=torch.long, device=x.device)
        x = global_mean_pool(x, batch)
        
        # Final classification layer.
        x = self.fc(x)
        return x
    
model = NFLGraphClassifier(in_channels=6, hidden_channels=32, num_classes=3)
train_loader = DataLoader(data_list, batch_size=32, shuffle=True)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 100

def train(model, data):
    model.train()
    optimizer.zero_grad()
    # Forward pass: obtain logits
    out = model(data)
    # data.y contains the ground truth label for the graph.
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    return loss.item()


for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    num_batches = 0

    for batch in train_loader:
        loss = train(model, batch)
        total_loss += loss  
        num_batches += 1

    avg_loss = total_loss / num_batches if num_batches > 0 else 0
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

print("This shit done")





# -----------------------------------------
# STEP 1: Constructing the Graph Data
# -----------------------------------------




####### NOT WORKING BELOW HERE YET ######### RETURN IS INTENTIONAL TO STOP EXECUTION



# Example: Suppose each play involves 11 players.
# For simplicity, here we'll generate random (x, y) positions for each player.


# Create graph edges:
# For the NFL simulation, you might either:
#   - Connect all players (fully connected), or
#   - Connect only nearby players using k-Nearest Neighbors (kNN)
# For demonstration, we’ll construct a fully connected graph (excluding self-loops).

# edge_index_list = []
# edge_attr_list = []

# for i in range(num_players):
#     for j in range(num_players):
#         if i != j:
#             edge_index_list.append([i, j])
#             # Compute Euclidean distance as separation between players i and j.
#             distance = torch.norm(player_positions[i] - player_positions[j], p=2)
#             edge_attr_list.append([distance])  # wrapped in a list to create a 2D tensor later

# # Convert lists to tensors.
# edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous()  # shape [2, num_edges]
# edge_attr = torch.tensor(edge_attr_list, dtype=torch.float)  # shape [num_edges, 1]

# # -----------------------------------------
# # STEP 2: Define the Ground Truth Label
# # -----------------------------------------
# # Map the pass outcomes to integer labels.
# # For example:
# #    0 => complete, 1 => incomplete, 2 => interception
# pass_outcome = "complete"  # This would come from your dataset in practice.
# label_mapping = {'complete': 0, 'incomplete': 1, 'interception': 2}
# # Graph-level label as a tensor
# graph_label = torch.tensor(label_mapping[pass_outcome], dtype=torch.long)

# # -----------------------------------------
# # STEP 3: Create the PyTorch Geometric Data Object
# # -----------------------------------------
# # Note: In a realistic scenario, you might also have additional node features.
# data = Data(
#     x=player_positions,     # Node features (e.g., player positions)
#     edge_index=edge_index,  # Graph connectivity
#     edge_attr=edge_attr,    # Edge weights (separation between players)
#     y=graph_label         # Graph-level label (pass outcome)
# )

# print("Constructed Graph Data:\n", data)

# # -----------------------------------------
# # STEP 4: Define a GNN Model for Graph-Level Classification
# # -----------------------------------------

# class NFLGraphClassifier(nn.Module):
#     def __init__(self, in_channels, hidden_channels, num_classes):
#         super(NFLGraphClassifier, self).__init__()
#         self.conv1 = GCNConv(in_channels, hidden_channels)
#         self.conv2 = GCNConv(hidden_channels, hidden_channels)
#         # A fully connected layer to map the global pooled vector to class logits.
#         self.fc = nn.Linear(hidden_channels, num_classes)
    
#     def forward(self, data):
#         x, edge_index, batch = data.x, data.edge_index, data.batch if hasattr(data, 'batch') else None
        
#         # First GCN layer with ReLU activation.
#         x = self.conv1(x, edge_index)
#         x = F.relu(x)
#         # Second GCN layer.
#         x = self.conv2(x, edge_index)
#         x = F.relu(x)
        
#         # Global pooling: aggregate node embeddings to form a graph representation.
#         # If working with a single graph, you can supply a batch tensor of zeros.
#         if batch is None:  
#             batch = torch.zeros(x.size(0), dtype=torch.long, device=x.device)
#         x = global_mean_pool(x, batch)
        
#         # Final classification layer.
#         x = self.fc(x)
#         return x

# # -----------------------------------------
# # STEP 5: Example Training Setup
# # -----------------------------------------

# # Hyperparameters for our model:
# in_channels = player_positions.size(1)  # Number of node features (here 2: [x, y])
# hidden_channels = 32
# num_classes = 3  # complete, incomplete, interception

# # Initialize our GNN model.
# model = NFLGraphClassifier(in_channels, hidden_channels, num_classes)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# criterion = nn.CrossEntropyLoss()  # Expects raw logits and ground truth labels.

# def train(model, data):
#     model.train()
#     optimizer.zero_grad()
#     # Forward pass: obtain logits
#     out = model(data)
#     # data.y contains the ground truth label for the graph.
#     loss = criterion(out, data.y)
#     loss.backward()
#     optimizer.step()
#     return loss.item()

# # To enable global pooling for a single graph, we assign a dummy batch index to all nodes.
# data.batch = torch.zeros(data.num_nodes, dtype=torch.long)

# # Training loop (one iteration for demo purposes).
# loss_val = train(model, data)
# print("Training loss:", loss_val)


Processing Plays: 100%|██████████| 24863/24863 [08:30<00:00, 48.73play/s]


Epoch 1/100, Loss: 1.4656
Epoch 2/100, Loss: 0.7786
Epoch 3/100, Loss: 0.7574
Epoch 4/100, Loss: 0.7443
Epoch 5/100, Loss: 0.7306
Epoch 6/100, Loss: 0.7076
Epoch 7/100, Loss: 0.6946
Epoch 8/100, Loss: 0.6969
Epoch 9/100, Loss: 0.6796
Epoch 10/100, Loss: 0.6625
Epoch 11/100, Loss: 0.6574
Epoch 12/100, Loss: 0.6465
Epoch 13/100, Loss: 0.6421
Epoch 14/100, Loss: 0.6308
Epoch 15/100, Loss: 0.6228
Epoch 16/100, Loss: 0.6173
Epoch 17/100, Loss: 0.6074
Epoch 18/100, Loss: 0.6040
Epoch 19/100, Loss: 0.5978
Epoch 20/100, Loss: 0.5924
Epoch 21/100, Loss: 0.5859
Epoch 22/100, Loss: 0.5789
Epoch 23/100, Loss: 0.5762
Epoch 24/100, Loss: 0.5700
Epoch 25/100, Loss: 0.5641
Epoch 26/100, Loss: 0.5606
Epoch 27/100, Loss: 0.5588
Epoch 28/100, Loss: 0.5528
Epoch 29/100, Loss: 0.5433
Epoch 30/100, Loss: 0.5402
Epoch 31/100, Loss: 0.5322
Epoch 32/100, Loss: 0.5263
Epoch 33/100, Loss: 0.5275
Epoch 34/100, Loss: 0.5168
Epoch 35/100, Loss: 0.5144
Epoch 36/100, Loss: 0.5035
Epoch 37/100, Loss: 0.4956
Epoch 38/1

In [39]:
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
import numpy as np
import pandas as pd
from tqdm import tqdm

# Assuming all your previous functions (process_play, adjust_position, compute_edge_weight, etc.)
# are defined and available in the current namespace.

# For example, here is a stub of your trained model (make sure to load your trained state_dict if saved):
class NFLGraphClassifier(nn.Module):
    def __init__(self, in_channels, hidden_channels, num_classes):
        super(NFLGraphClassifier, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.fc = nn.Linear(hidden_channels, num_classes)
    
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch if hasattr(data, 'batch') else None
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        if batch is None:
            batch = torch.zeros(x.size(0), dtype=torch.long, device=x.device)
        x = global_mean_pool(x, batch)
        x = self.fc(x)
        return x

# Function to process a single unseen play and return a PyTorch Geometric Data object.
def process_unseen_play(play_row):
    """
    Given a row for an unseen play, processes the play (adjust positions, create edge indices,
    node features, and edge attributes) and returns a PyTorch Geometric Data object.
    """
    currPlayId = play_row["playId"]
    currGameId = play_row["gameId"]

    # Select game and play information from your dataset.
    curr_game = game_data[game_data["gameId"] == play_row["gameId"]]
    curr_play = plays_data[(plays_data["gameId"] == play_row["gameId"]) & (plays_data["playId"] == play_row["playId"])]

    play_row = adjust_position(play_row, curr_game, curr_play)

    # Extract receivers and defenders based on your logic.
    Recs = player_play_data[
        (player_play_data["gameId"] == play_row["gameId"]) &
        (player_play_data["playId"] == play_row["playId"]) &
        (player_play_data["wasRunningRoute"] == True)
    ]
    Defs = player_play_data[
        (player_play_data["gameId"] == play_row["gameId"]) &
        (player_play_data["playId"] == play_row["playId"]) &
        ~pd.isna(player_play_data["pff_defensiveCoverageAssignment"])
    ]

    # Use tracking data to refine positions for the play.
    Recs = week_data_passed[
        (week_data_passed["gameId"] == play_row["gameId"]) &
        (week_data_passed["playId"] == play_row["playId"]) &
        (week_data_passed["nflId"].isin(Recs["nflId"]))
    ].copy()
    Defs = week_data_passed[
        (week_data_passed["gameId"] == play_row["gameId"]) &
        (week_data_passed["playId"] == play_row["playId"]) &
        (week_data_passed["nflId"].isin(Defs["nflId"]))
    ].copy()

    # Reset indices to ensure proper node indexing.
    Recs = Recs.reset_index(drop=True)
    Defs = Defs.reset_index(drop=True)

    # Create edge indices and compute edge attributes.
    edge_index_list = []
    edge_weight_list = []
    for rec_idx, rec in Recs.iterrows():
        for def_idx, defe in Defs.iterrows():
            edge_index_list.append([rec_idx, def_idx + len(Recs)])
            weight_value = compute_edge_weight(rec, defe, weight_params)
            edge_weight_list.append([weight_value])
    
    # Build node features by stacking receiver and defender features.
    rec_features = Recs[['x', 'y', 's', 'a', 'dir', 'o']].to_numpy()
    def_features = Defs[['x', 'y', 's', 'a', 'dir', 'o']].to_numpy()
    node_features = np.vstack([rec_features, def_features])
    x = torch.tensor(node_features, dtype=torch.float)

    # Build edge tensors.
    if len(edge_index_list) == 0:
        return None  # or handle cases with no valid edge
    edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous()
    edge_attr = torch.tensor(edge_weight_list, dtype=torch.float)

    # Create the graph-level Data object.
    # Here we assume that the pass outcome is unknown, so set a dummy label; prediction will replace it.
    y = torch.tensor([-1], dtype=torch.long)  # dummy label
    data_obj = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
    data_obj.batch = torch.zeros(x.size(0), dtype=torch.long)
    return data_obj

# Function to run prediction on one play.
def predict_play(play_row, model):
    data_obj = process_unseen_play(play_row)
    if data_obj is None:
        print("No valid graph for this play.")
        return None
    model.eval()  # Set model to evaluation mode.
    with torch.no_grad():
        logits = model(data_obj)  # Forward pass.
        # Apply softmax to obtain class probabilities.
        probs = F.softmax(logits, dim=1)
        predicted_class = torch.argmax(probs, dim=1).item()
    # Map the predicted class to the pass outcome label.
    label_mapping = {0: "Complete", 1: "Incomplete", 2: "Interception"}
    prediction = label_mapping.get(predicted_class, "Unknown")
    return prediction, probs.squeeze().tolist()

# -------------------------
# Example usage:
# Assume you have an unseen play (a row from your tracking/play dataset).
# You could extract it from a CSV or use one from week_data_arrived.
# For demonstration, we'll take the first row of week_data_arrived:
randN = np.random.randint(1, 10000)
unseen_play = week_data_arrived.iloc[randN]
print("Processing unseen play...")
prediction, prob_scores = predict_play(unseen_play, model)
print("Predicted outcome:", prediction)
print("Class probabilities:", prob_scores)


Processing unseen play...
Predicted outcome: Complete
Class probabilities: [0.8776790499687195, 0.1223168894648552, 4.081518000020878e-06]
