# Extract Defensive Tracking Features
This notebook extracts defensive formation player locations and creates formation features 

---

In [267]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder

# Process Defense Tracking and Ball Locations 

In [268]:
COLUMNS = ["gameId", "playId", "nflId", "frameId", "x", "y"]

In [269]:
!rm -rf /kaggle/working/*

In [270]:
# Add data for defense locations
tracking_files = [
                   '/kaggle/input/nfl-big-data-bowl-2025/tracking_week_9.csv',
]
playerdata_file = '/kaggle/input/nfl-big-data-bowl-2025/players.csv'
players = pd.read_csv(playerdata_file)
tracking = pd.concat((pd.read_csv(file, low_memory = False) for file in tracking_files), ignore_index=True)
tracking[tracking["frameType"].isin(["BEFORE_SNAP", "SNAP"])] #filter to before and snap frames

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022110700,56,33131.0,Calais Campbell,1,BEFORE_SNAP,2022-11-08 01:16:09.2,93.0,BAL,left,83.000000,28.440000,0.0,0.0,0.01,197.06,23.13,huddle_break_offense
1,2022110700,56,33131.0,Calais Campbell,2,BEFORE_SNAP,2022-11-08 01:16:09.3,93.0,BAL,left,83.000000,28.450000,0.0,0.0,0.01,196.46,38.70,
2,2022110700,56,33131.0,Calais Campbell,3,BEFORE_SNAP,2022-11-08 01:16:09.4,93.0,BAL,left,83.010000,28.450000,0.0,0.0,0.01,196.46,47.59,
3,2022110700,56,33131.0,Calais Campbell,4,BEFORE_SNAP,2022-11-08 01:16:09.5,93.0,BAL,left,83.020000,28.450000,0.0,0.0,0.01,195.50,52.81,
4,2022110700,56,33131.0,Calais Campbell,5,BEFORE_SNAP,2022-11-08 01:16:09.6,93.0,BAL,left,83.020000,28.460000,0.0,0.0,0.00,194.24,51.82,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5671665,2022110300,3579,,football,22,BEFORE_SNAP,2022-11-04 03:04:34,,football,left,74.800003,29.850000,0.0,0.0,0.00,,,
5671666,2022110300,3579,,football,23,BEFORE_SNAP,2022-11-04 03:04:34.1,,football,left,74.800003,29.850000,0.0,0.0,0.00,,,
5671667,2022110300,3579,,football,24,BEFORE_SNAP,2022-11-04 03:04:34.2,,football,left,74.800003,29.850000,0.0,0.0,0.00,,,
5671668,2022110300,3579,,football,25,BEFORE_SNAP,2022-11-04 03:04:34.3,,football,left,74.800003,29.860001,0.0,0.0,0.00,,,


In [271]:
ball_coordinates = tracking[["gameId", "playId", "x", "y","frameId","nflId"]]
ball_coordinates = ball_coordinates[(ball_coordinates["frameId"] == 1) & (ball_coordinates["nflId"].isna())]
ball_coordinates = ball_coordinates[["gameId", "playId", "x", "y"]]
# Reset index (optional, for clean output)
ball_coordinates.reset_index(drop=True, inplace=True)

# Display the result
ball_coordinates = ball_coordinates.rename(columns={"x": "x_ball", "y": "y_ball"})
print(ball_coordinates.head())

       gameId  playId     x_ball     y_ball
0  2022110700      56  85.050003  29.910000
1  2022110700      80  70.150002  29.920000
2  2022110700     105  70.400002  29.840000
3  2022110700     126  66.519997  30.090000
4  2022110700     167  20.549999  23.799999


In [272]:
#filter tracking data for only defenseive players 
players = players[["nflId","position"]]
# #only use data from audible plays 
# tracking = tracking.merge(
#     audible_tracking_data,
#     on=["gameId", "playId"],
#     how="inner"
# )
positions_tracking = pd.merge(tracking, players, on='nflId', how='left')

defensive_positions = ["DE","NT","SS","FS","OLB","DT","CB","ILB","MLB","DB","LB"]
defensive_tracking = positions_tracking[positions_tracking["position"].isin(defensive_positions)]
def_players_plays = defensive_tracking.groupby(["gameId", "playId"])

# Extract Defenders Locations

In [273]:
pd.options.mode.chained_assignment = None
def find_stop_frame(frames, STOP_THRESHOLD=0.20):
    # Filter frames where frameId > 40
    subset = frames[frames["frameId"] > 40]

    # Identify the frame with the minimum 's' for each group
    stop_frame = (
        subset.loc[subset.groupby(["gameId", "playId", "nflId"])["s"].idxmin()]
        .reset_index(drop=True)
        .rename(columns={"frameId": "stopId"})
    )

    # Select relevant columns
    stop_frame = stop_frame[["gameId", "playId", "nflId", "stopId"]]

    return stop_frame

def get_locations(defensive_tracking, STOP_THRESHOLD=0.20):
    # Compute stop frame for each group
    stop_frame = find_stop_frame(defensive_tracking, STOP_THRESHOLD)

    # Merge back to get the stop locations
    merged = defensive_tracking.merge(
        stop_frame, on=["gameId", "playId", "nflId"], how="left"
    )

    # Assign x_set and y_set based on stop frame
    merged["x_set"] = merged.apply(
        lambda row: row["x"] if row["frameId"] == row["stopId"] else None, axis=1
    )
    merged["y_set"] = merged.apply(
        lambda row: row["y"] if row["frameId"] == row["stopId"] else None, axis=1
    )

    # Forward fill the set locations within each group
    merged["x_set"] = merged.groupby(["gameId", "playId", "nflId"])["x_set"].transform("first")
    merged["y_set"] = merged.groupby(["gameId", "playId", "nflId"])["y_set"].transform("first")

    # Return relevant columns, including x_set and y_set
    return merged[["gameId", "playId", "nflId", "frameId", "x_set", "y_set"]]

# Apply the function
defensive_tracking = get_locations(defensive_tracking)

In [274]:
defensive_tracking = defensive_tracking[["gameId", "playId", "nflId", "x_set", "y_set","frameId"]]
defensive_tracking = defensive_tracking[defensive_tracking["frameId"]==1]

In [275]:
# Merge ball coordinates with defensive_tracking
defensive_tracking = defensive_tracking.merge(ball_coordinates, on=["gameId", "playId"], how="left")
display(defensive_tracking.head())

Unnamed: 0,gameId,playId,nflId,x_set,y_set,frameId,x_ball,y_ball
0,2022110700,56,33131.0,82.82,34.05,1,85.050003,29.91
1,2022110700,56,35454.0,83.99,24.64,1,85.050003,29.91
2,2022110700,56,42361.0,75.91,43.16,1,85.050003,29.91
3,2022110700,56,44828.0,77.41,9.43,1,85.050003,29.91
4,2022110700,56,44999.0,67.54,29.36,1,85.050003,29.91


In [276]:
# GRAPH FORMATION
import plotly.express as px

specific_play = defensive_tracking[
    (defensive_tracking["gameId"] == 2022090800) & 
    (defensive_tracking["playId"] == 56)
]

# Drop rows without x_set and y_set
specific_play = specific_play.dropna(subset=["x_set", "y_set"])

# Create the scatter plot
fig = px.scatter(
    specific_play,
    x="x_set",
    y="y_set",
    title="Set Location Points for Play (x_set, y_set)",
    labels={"x_set": "X Coordinate", "y_set": "Y Coordinate"}
)

# Show the plot
fig.show()

# Extract Formation features from Locations 

In [277]:
def get_defensive_formation(tracking_data):
    """
    Compute the number of defensive players in specific zones relative to the ball's location.

    Parameters:
    tracking_data (DataFrame): DataFrame containing tracking data with precomputed ball data.

    Returns:
    DataFrame: Contains 'gameId', 'playId', 'N_LOS', 'N_BOX', and 'N_DEEP'.
    """

    # Adjust x-coordinates for play direction
    tracking_data["x_adjusted"] = np.abs(tracking_data["x_ball"] - tracking_data["x_set"])

    # Adjust y-coordinates relative to the ball
    tracking_data["y_adjusted"] = np.abs(tracking_data["y_set"] - tracking_data["y_ball"])

    # Define zones using Boolean masks
    tracking_data["in_box"] = (
        (tracking_data["y_adjusted"] <= 8) & 
        (tracking_data["x_adjusted"] >= 2.5) & 
        (tracking_data["x_adjusted"] <= 6)
    )
    tracking_data["on_los"] = (
        (tracking_data["y_adjusted"] <= 8) & 
        (tracking_data["x_adjusted"] >= 0) & 
        (tracking_data["x_adjusted"] <= 2)
    )
    tracking_data["deep_defender"] = (
        (tracking_data["x_adjusted"] > 7.5)
    )

    # Group by 'gameId' and 'playId' and count players in each zone
    formation_counts = tracking_data.groupby(["gameId", "playId"]).agg(
        N_LOS=("on_los", "sum"),
        N_BOX=("in_box", "sum"),
        N_DEEP=("deep_defender", "sum"),
    ).reset_index()

    return formation_counts

In [280]:
defensive_formation = get_defensive_formation(defensive_tracking)
display(defensive_tracking.head())

Unnamed: 0,gameId,playId,nflId,x_set,y_set,frameId,x_ball,y_ball,x_adjusted,y_adjusted,in_box,on_los,deep_defender
0,2022110700,56,33131.0,82.82,34.05,1,85.050003,29.91,2.230003,4.14,False,False,False
1,2022110700,56,35454.0,83.99,24.64,1,85.050003,29.91,1.060003,5.27,False,True,False
2,2022110700,56,42361.0,75.91,43.16,1,85.050003,29.91,9.140003,13.25,False,False,True
3,2022110700,56,44828.0,77.41,9.43,1,85.050003,29.91,7.640003,20.48,False,False,True
4,2022110700,56,44999.0,67.54,29.36,1,85.050003,29.91,17.510003,0.55,False,False,True


In [279]:
defensive_tracking.to_csv('defensive_locations_9.csv', index=False, chunksize=100000)
defensive_formation.to_csv('defensive_formation_9.csv', index=False, chunksize=100000)