In [1]:
import os, sys

# Just to check the path and filenames:
!ls /kaggle/input
!ls /kaggle/input/keras-tcn-demo

# 1) Install protobuf from the wheel in dataset
!pip install /kaggle/input/keras-tcn-demo/protobuf-5.29.5-*.whl --no-deps

# 2) Install keras-tcn from the wheel in dataset
!pip install /kaggle/input/keras-tcn-demo/keras_tcn-3.5.6-py3-none-any.whl --no-deps

keras-tcn-demo	nfl-big-data-bowl-2026-prediction
keras_tcn-3.5.6-py3-none-any.whl
protobuf-5.29.5-cp38-abi3-manylinux2014_x86_64.whl
Processing /kaggle/input/keras-tcn-demo/protobuf-5.29.5-cp38-abi3-manylinux2014_x86_64.whl
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 6.33.0
    Uninstalling protobuf-6.33.0:
      Successfully uninstalled protobuf-6.33.0
Successfully installed protobuf-5.29.5
Processing /kaggle/input/keras-tcn-demo/keras_tcn-3.5.6-py3-none-any.whl
Installing collected packages: keras-tcn
Successfully installed keras-tcn-3.5.6


In [2]:
from tcn import TCN
import tensorflow as tf
import warnings
print("TCN OK, TF:", tf.__version__)
warnings.filterwarnings('ignore')

2025-12-03 06:25:38.474401: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764743138.655782      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764743138.708035      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


TCN OK, TF: 2.18.0


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
#from tcn import TCN

# =========================
# 1. PATHS (KAGGLE VERSION)
# =========================

# In Kaggle, the competition data is here:
DATA = Path("/kaggle/input/nfl-big-data-bowl-2026-prediction/train")
print("DATA exists:", DATA.exists())

DATA exists: True


In [4]:
# =========================
# FULL PIPELINE (train + test + submission)
# =========================

# ---- Paths inside Kaggle ----
DATA_ROOT   = Path("/kaggle/input/nfl-big-data-bowl-2026-prediction")
DATA_TRAIN  = DATA_ROOT / "train"
TEST_INPUT  = DATA_ROOT / "test_input.csv"
TEST_KEYS   = DATA_ROOT / "test.csv"

print("Train folder exists:", DATA_TRAIN.exists())
print("Test_input exists:", TEST_INPUT.exists())
print("Test.csv exists:", TEST_KEYS.exists())

# =========================
# 1. Load + standardize train
# =========================
def standardize_by_play_direction(df):
    left = df["play_direction"].eq("left")
    # positions
    df.loc[left, "x"] = 120 - df.loc[left, "x"]
    df.loc[left, "y"] = 53.3 - df.loc[left, "y"]
    if "ball_land_x" in df:
        df.loc[left, "ball_land_x"] = 120 - df.loc[left, "ball_land_x"]
    if "ball_land_y" in df:
        df.loc[left, "ball_land_y"] = 53.3 - df.loc[left, "ball_land_y"]
    # angles
    if "dir" in df:
        df.loc[left, "dir"] = (df.loc[left, "dir"] + 180) % 360
    if "o" in df:
        df.loc[left, "o"] = (df.loc[left, "o"] + 180) % 360
    return df

def load_inputs_outputs(data_dir: Path):
    inp_weeks = []
    out_weeks = []
    for w in range(1, 19):
        week = f"w{w:02d}"
        df_in = pd.read_csv(data_dir / f"input_2023_{week}.csv")
        df_in = standardize_by_play_direction(df_in)
        df_in["week"] = week
        inp_weeks.append(df_in)

        df_out = pd.read_csv(data_dir / f"output_2023_{week}.csv")
        df_out["week"] = week
        out_weeks.append(df_out)

    inp_all = pd.concat(inp_weeks, ignore_index=True)
    df_output = pd.concat(out_weeks, ignore_index=True)
    return inp_all, df_output

inp_all, df_output = load_inputs_outputs(DATA_TRAIN)
print("inp_all:", inp_all.shape, "df_output:", df_output.shape)

# Map play_direction into output and standardize outputs too
dir_map = inp_all[["game_id", "play_id", "play_direction"]].drop_duplicates()
df_output = df_output.merge(dir_map, on=["game_id", "play_id"], how="left")
df_output = standardize_by_play_direction(df_output)

# =========================
# 2. Feature engineering (TRAIN)
# =========================
group_cols_full  = ["game_id", "play_id", "frame_id"]
player_key       = ["game_id", "play_id", "frame_id", "nfl_id"]

base = inp_all.copy().sort_values(player_key)

# 2.1 self-based
base[["dx", "dy"]] = (
    base.groupby(["game_id", "play_id", "nfl_id"])[["x", "y"]]
        .diff()
        .fillna(0.0)
)
base["do"] = (
    base.groupby(["game_id", "play_id", "nfl_id"])["o"]
        .diff()
        .fillna(0.0)
)
base["dist_to_ball"] = np.hypot(
    base["x"] - base["ball_land_x"],
    base["y"] - base["ball_land_y"]
)
angle_to_ball = np.arctan2(
    base["ball_land_y"] - base["y"],
    base["ball_land_x"] - base["x"]
)
base["ball_dirx"] = np.cos(angle_to_ball)
base["ball_diry"] = np.sin(angle_to_ball)
rad_dir = np.deg2rad(base["dir"])
base["vx"] = base["s"] * np.cos(rad_dir)
base["vy"] = base["s"] * np.sin(rad_dir)
base[["ddx", "ddy"]] = (
    base.groupby(["game_id", "play_id", "nfl_id"])[["dx", "dy"]]
        .diff()
        .fillna(0.0)
)

# 2.2 QB features
qb = (
    base[base["player_role"] == "Passer"]
    [group_cols_full + ["x", "y"]]
    .rename(columns={"x": "x_qb", "y": "y_qb"})
)
base = base.merge(qb, on=group_cols_full, how="left")
base["dist_qb"] = np.hypot(
    base["x"] - base["x_qb"],
    base["y"] - base["y_qb"]
)

# 2.3 Targeted receiver distance
def add_dist_to_targeted_receiver(df: pd.DataFrame) -> pd.DataFrame:
    tr = (
        df[df["player_role"] == "Targeted Receiver"]
        [group_cols_full + ["x", "y"]]
        .rename(columns={"x": "x_tr", "y": "y_tr"})
    )
    df = df.merge(tr, on=group_cols_full, how="left")
    df["dist_to_tr"] = np.hypot(
        df["x"] - df["x_tr"],
        df["y"] - df["y_tr"]
    )
    return df

base = add_dist_to_targeted_receiver(base)

# 2.4 nearest opponent
def nearest_opponent_distance(df: pd.DataFrame, side_col: str = "player_side") -> pd.DataFrame:
    off  = df[df[side_col] == "Offense"]
    deff = df[df[side_col] == "Defense"]

    off_m = off.merge(
        deff[["game_id", "play_id", "frame_id", "nfl_id", "x", "y"]],
        on=["game_id", "play_id", "frame_id"],
        suffixes=("", "_opp")
    )
    off_m["dist_opp"] = np.hypot(
        off_m["x"] - off_m["x_opp"],
        off_m["y"] - off_m["y_opp"]
    )
    off_min = (
        off_m.groupby(player_key)["dist_opp"]
             .min()
             .reset_index()
    )

    deff_m = deff.merge(
        off[["game_id", "play_id", "frame_id", "nfl_id", "x", "y"]],
        on=["game_id", "play_id", "frame_id"],
        suffixes=("", "_opp")
    )
    deff_m["dist_opp"] = np.hypot(
        deff_m["x"] - deff_m["x_opp"],
        deff_m["y"] - deff_m["y_opp"]
    )
    deff_min = (
        deff_m.groupby(player_key)["dist_opp"]
              .min()
              .reset_index()
    )

    both = pd.concat([off_min, deff_min], ignore_index=True)
    return both

nearest = nearest_opponent_distance(base)
base = base.merge(nearest, on=player_key, how="left")
base.rename(columns={"dist_opp": "dist_nearest_opp"}, inplace=True)

# one-hot side/role
base = pd.get_dummies(
    base,
    columns=["player_side", "player_role"],
    drop_first=True
)

# keep only players to predict
seq_df = base[base["player_to_predict"]].copy()
seq_df = seq_df.sort_values(player_key)

# restrict outputs to those players
keys_pp = seq_df[["game_id", "play_id", "nfl_id"]].drop_duplicates()
df_out_pp = df_output.merge(
    keys_pp,
    on=["game_id", "play_id", "nfl_id"],
    how="inner"
).sort_values(player_key)

# =========================
# 3. Build TRAIN sequences
# =========================
in_lengths = seq_df.groupby(["game_id", "play_id", "nfl_id"])["frame_id"].nunique()
out_lengths = df_out_pp.groupby(["game_id", "play_id", "nfl_id"])["frame_id"].max()

T_IN   = 10
MAX_OUT = 20

frame_features = [
    "s", "a", "o", "dir",
    "vx", "vy",
    "dx", "dy", "ddx", "ddy",
    "dist_to_ball",
    "ball_dirx", "ball_diry",
    "x_qb", "y_qb", "dist_qb",
    "dist_nearest_opp",
    "dist_to_tr",
    "num_frames_output",
]

dummy_cols = [
    c for c in seq_df.columns
    if c.startswith("player_side_") or c.startswith("player_role_")
]
frame_features = frame_features + dummy_cols

X_list   = []
Y_list   = []
start_xy = []
lengths  = []
meta     = []

for (g, p, n), g_in in seq_df.groupby(["game_id", "play_id", "nfl_id"]):
    g_out = df_out_pp[
        (df_out_pp["game_id"] == g) &
        (df_out_pp["play_id"] == p) &
        (df_out_pp["nfl_id"] == n)
    ].sort_values("frame_id")

    if len(g_in) < T_IN or len(g_out) < 1:
        continue

    g_in_last = g_in.tail(T_IN).copy()
    actual_len = min(len(g_out), MAX_OUT)
    g_out_first = g_out.head(actual_len).copy()

    x0, y0 = g_in_last.iloc[-1][["x", "y"]]
    start_xy.append([x0, y0])

    g_in_last["x_rel"] = g_in_last["x"] - x0
    g_in_last["y_rel"] = g_in_last["y"] - y0

    feat_df = g_in_last[frame_features + ["x_rel", "y_rel"]]
    if feat_df.isna().any().any():
        start_xy.pop()
        continue

    Xin = feat_df.to_numpy(dtype=np.float32)

    Y_abs = g_out_first[["x", "y"]].to_numpy(dtype=np.float32)
    Y_rel = Y_abs - np.array([[x0, y0]], dtype=np.float32)

    if actual_len < MAX_OUT:
        padding = np.zeros((MAX_OUT - actual_len, 2), dtype=np.float32)
        Y_rel = np.vstack([Y_rel, padding])

    X_list.append(Xin)
    Y_list.append(Y_rel)
    lengths.append(actual_len)
    side = "Offense" if g_in["player_side_Offense"].iloc[0] == 1 else "Defense"
    meta.append((g, p, n, side))

X_seq    = np.stack(X_list)
Y_seq    = np.stack(Y_list)
start_xy = np.array(start_xy, np.float32)
lengths  = np.array(lengths, np.int32)

print("X_seq:", X_seq.shape, "Y_seq:", Y_seq.shape)

# =========================
# 4. Train/val split + scaling
# =========================
N, T_IN_, F = X_seq.shape
_, T_OUT_, _ = Y_seq.shape
print("T_IN, F =", T_IN_, F, "| T_OUT =", T_OUT_)

X_train, X_val, Y_train, Y_val, start_train, start_val, len_train, len_val = train_test_split(
    X_seq, Y_seq, start_xy, lengths, test_size=0.2, random_state=42
)

meta_arr = np.array(meta, dtype=object)
_, meta_val = train_test_split(meta_arr, test_size=0.2, random_state=42)
player_side_val = np.array([m[3] for m in meta_val])

X_train_flat = X_train.reshape(-1, F)
feat_mean = X_train_flat.mean(axis=0)
feat_std  = X_train_flat.std(axis=0) + 1e-6

def scale_X(X):
    return (X - feat_mean) / feat_std

X_train_sc = scale_X(X_train)
X_val_sc   = scale_X(X_val)
Y_train_sc = Y_train
Y_val_sc   = Y_val

# =========================
# 5. Define & train TCN
# =========================
class MaskedMSE(keras.losses.Loss):
    def __init__(self, name="masked_mse"):
        super().__init__(name=name)
    def call(self, y_true, y_pred):
        squared_diff = tf.square(y_true - y_pred)
        mask = tf.cast(
            tf.logical_or(
                tf.abs(y_true[:, :, 0]) > 1e-6,
                tf.abs(y_true[:, :, 1]) > 1e-6,
            ),
            tf.float32,
        )
        mask = tf.expand_dims(mask, -1)
        masked_loss = squared_diff * mask
        sum_loss   = tf.reduce_sum(masked_loss)
        num_valid  = tf.reduce_sum(mask) + 1e-8
        return sum_loss / num_valid

inputs = keras.Input(shape=(T_IN, F))
x = TCN(
    nb_filters=64,
    kernel_size=3,
    dilations=[1, 2, 4, 8],
    dropout_rate=0.1,
    return_sequences=False,
)(inputs)
x = keras.layers.Dense(128, activation="relu")(x)
x = keras.layers.Dense(MAX_OUT * 2)(x)
outputs = keras.layers.Reshape((MAX_OUT, 2))(x)

tcn_model = keras.Model(inputs, outputs)
tcn_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss=MaskedMSE(),
)

history = tcn_model.fit(
    X_train_sc, Y_train_sc,
    validation_data=(X_val_sc, Y_val_sc),
    epochs=12,        # fewer epochs on Kaggle for speed
    batch_size=128,
    verbose=1,
)

# =========================
# 5b. VALIDATION RMSE (same as in VS Code)
# =========================

# Predictions on validation set
Y_val_pred = tcn_model.predict(X_val_sc, verbose=0)  # (N_val, MAX_OUT, 2), relative coords
Y_val_true = Y_val_sc                                # (N_val, MAX_OUT, 2)

def rmse_masked(y_true, y_pred, lengths):
    """RMSE over valid frames only (uses per-sequence length)."""
    total_sq_dist = 0.0
    total_frames  = 0
    for i in range(len(lengths)):
        L = lengths[i]
        diff    = y_true[i, :L] - y_pred[i, :L]   # (L, 2)
        sq_dist = np.sum(diff**2, axis=-1)        # dx^2 + dy^2 per frame
        total_sq_dist += sq_dist.sum()
        total_frames  += L
    return np.sqrt(total_sq_dist / total_frames)

# Overall RMSE in relative coords
rmse_rel = rmse_masked(Y_val_true, Y_val_pred, len_val)
print(f"TCN RMSE (masked, relative) on val: {rmse_rel:.3f}")

# RMSE in absolute field coordinates (like in VSC)
Y_val_true_abs = start_val[:, None, :] + Y_val_true
Y_val_pred_abs = start_val[:, None, :] + Y_val_pred
rmse_abs = rmse_masked(Y_val_true_abs, Y_val_pred_abs, len_val)
print(f"TCN RMSE (masked, absolute) on val: {rmse_abs:.3f}")

# Offense / Defense separate (optional, same as before)
off_idx = np.where(player_side_val == "Offense")[0]
def_idx = np.where(player_side_val == "Defense")[0]

rmse_offense = rmse_masked(
    Y_val_true[off_idx],
    Y_val_pred[off_idx],
    len_val[off_idx],
)
rmse_defense = rmse_masked(
    Y_val_true[def_idx],
    Y_val_pred[def_idx],
    len_val[def_idx],
)

print(f"RMSE (Offense, abs): {rmse_masked(Y_val_true_abs[off_idx], Y_val_pred_abs[off_idx], len_val[off_idx]):.3f}")
print(f"RMSE (Defense, abs): {rmse_masked(Y_val_true_abs[def_idx], Y_val_pred_abs[def_idx], len_val[def_idx]):.3f}")


Train folder exists: True
Test_input exists: True
Test.csv exists: True
inp_all: (4880579, 24) df_output: (562936, 7)
X_seq: (46012, 10, 25) Y_seq: (46012, 20, 2)
T_IN, F = 10 25 | T_OUT = 20


I0000 00:00:1764743395.448903      20 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1764743395.449517      20 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Epoch 1/12


I0000 00:00:1764743400.475175      76 service.cc:148] XLA service 0x7e172c045c60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1764743400.475820      76 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1764743400.475841      76 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1764743401.140175      76 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 27/288[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 6ms/step - loss: 26.7551

I0000 00:00:1764743406.676939      76 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 29ms/step - loss: 10.0450 - val_loss: 1.1210
Epoch 2/12
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.3741 - val_loss: 0.8665
Epoch 3/12
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.0630 - val_loss: 0.8023
Epoch 4/12
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.8974 - val_loss: 0.6992
Epoch 5/12
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.8289 - val_loss: 0.6688
Epoch 6/12
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.7599 - val_loss: 0.6470
Epoch 7/12
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.7364 - val_loss: 0.6139
Epoch 8/12
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.6999 - val_loss: 0.6492
Epoch 9/12
[1m288/288[0m [32m━━━━━━━━━━━━━━━━

In [5]:
# ---------- helper: build test sequences from test_input ----------
def build_test_sequences_from_input(test_input_pd: pd.DataFrame):
    """
    Given test_input (pandas), build:
      - X_test_seq : (N_test, T_IN, F)
      - start_xy_test : (N_test, 2)
      - keys_to_row : mapping (game_id, play_id, nfl_id) -> row index
    using the SAME feature-engineering as in training.
    """
    test_inp = standardize_by_play_direction(test_input_pd.copy())

    group_cols_full  = ["game_id", "play_id", "frame_id"]
    player_key       = ["game_id", "play_id", "frame_id", "nfl_id"]

    base_test = test_inp.sort_values(player_key)

    # self-based
    base_test[["dx", "dy"]] = (
        base_test.groupby(["game_id", "play_id", "nfl_id"])[["x", "y"]]
                .diff()
                .fillna(0.0)
    )
    base_test["do"] = (
        base_test.groupby(["game_id", "play_id", "nfl_id"])["o"]
                .diff()
                .fillna(0.0)
    )
    base_test["dist_to_ball"] = np.hypot(
        base_test["x"] - base_test["ball_land_x"],
        base_test["y"] - base_test["ball_land_y"]
    )
    angle_to_ball_test = np.arctan2(
        base_test["ball_land_y"] - base_test["y"],
        base_test["ball_land_x"] - base_test["x"]
    )
    base_test["ball_dirx"] = np.cos(angle_to_ball_test)
    base_test["ball_diry"] = np.sin(angle_to_ball_test)
    rad_dir_test = np.deg2rad(base_test["dir"])
    base_test["vx"] = base_test["s"] * np.cos(rad_dir_test)
    base_test["vy"] = base_test["s"] * np.sin(rad_dir_test)
    base_test[["ddx", "ddy"]] = (
        base_test.groupby(["game_id", "play_id", "nfl_id"])[["dx", "dy"]]
                .diff()
                .fillna(0.0)
    )

    # QB
    qb_test = (
        base_test[base_test["player_role"] == "Passer"]
        [["game_id", "play_id", "frame_id", "x", "y"]]
        .rename(columns={"x": "x_qb", "y": "y_qb"})
    )
    base_test = base_test.merge(qb_test, on=["game_id", "play_id", "frame_id"], how="left")
    base_test["dist_qb"] = np.hypot(
        base_test["x"] - base_test["x_qb"],
        base_test["y"] - base_test["y_qb"]
    )

    # targeted receiver
    base_test = add_dist_to_targeted_receiver(base_test)

    # nearest opponent
    nearest_test = nearest_opponent_distance(base_test)
    base_test = base_test.merge(nearest_test, on=player_key, how="left")
    base_test.rename(columns={"dist_opp": "dist_nearest_opp"}, inplace=True)

    # one-hot
    base_test = pd.get_dummies(
        base_test,
        columns=["player_side", "player_role"],
        drop_first=True
    )

    # ensure SAME dummy columns as in train
    for col in dummy_cols:
        if col not in base_test.columns:
            base_test[col] = 0.0

    seq_df_test = base_test[base_test["player_to_predict"]].copy()
    seq_df_test = seq_df_test.sort_values(player_key)

    X_list_test   = []
    start_xy_test = []
    meta_test     = []

    for (g, p, n), g_in in seq_df_test.groupby(["game_id", "play_id", "nfl_id"]):
        if len(g_in) < T_IN:
            continue

        g_in_last = g_in.tail(T_IN).copy()
        x0, y0 = g_in_last.iloc[-1][["x", "y"]]
        start_xy_test.append([x0, y0])

        g_in_last["x_rel"] = g_in_last["x"] - x0
        g_in_last["y_rel"] = g_in_last["y"] - y0

        # --- NEW: guarantee all frame_features exist ---
        for col in frame_features:
            if col not in g_in_last.columns:
                g_in_last[col] = 0.0

        feat_df_test = g_in_last[frame_features + ["x_rel", "y_rel"]]

        # still skip NaN rows if something went wrong
        if feat_df_test.isna().any().any():
            start_xy_test.pop()
            continue

        Xin_test = feat_df_test.to_numpy(dtype=np.float32)

        X_list_test.append(Xin_test)
        meta_test.append((g, p, n))


    # SAFE GUARD: no valid sequences at all
    if len(X_list_test) == 0:
        return None, None, {}

    X_test_seq    = np.stack(X_list_test)
    start_xy_test = np.array(start_xy_test, np.float32)

    keys_to_row = {key: i for i, key in enumerate(meta_test)}

    return X_test_seq, start_xy_test, keys_to_row


def predict(test, test_input):
    """
    Called by Kaggle on the (hidden) test set.
    Must return a DataFrame with columns ['x', 'y'] in the SAME ROW ORDER as `test`.
    """
    # Convert to pandas if needed (polars, etc.)
    if hasattr(test, "to_pandas"):
        test = test.to_pandas()
    if hasattr(test_input, "to_pandas"):
        test_input = test_input.to_pandas()

    # ---- NEW: map (game_id, play_id) -> play_direction (original) ----
    dir_map = (
        test_input[["game_id", "play_id", "play_direction"]]
        .drop_duplicates()
        .set_index(["game_id", "play_id"])["play_direction"]
        .to_dict()
    )

    # Build sequences
    
    X_test_seq, start_xy_test, keys_to_row = build_test_sequences_from_input(test_input)

    # If NO sequences could be built for this chunk,
    # return zeros but keep the right length.
    if X_test_seq is None or X_test_seq.shape[0] == 0:
        return pd.DataFrame({
            "x": np.zeros(len(test), dtype=np.float32),
            "y": np.zeros(len(test), dtype=np.float32),
        })

    # Scale + predict (using the trained model)
    X_test_sc  = scale_X(X_test_seq)
    X_test_sc_clean = np.nan_to_num(X_test_sc)
    batch_size = 1000
    num_samples = X_test_sc_clean.shape[0]
    Y_test_rel = []

    for start in range(0, num_samples, batch_size):
        end = min(start + batch_size, num_samples)
        batch = X_test_sc_clean[start:end]
        preds = tcn_model.predict(batch, verbose=0)
        Y_test_rel.append(preds)

    Y_test_rel = np.concatenate(Y_test_rel, axis=0)

    Y_test_abs = start_xy_test[:, None, :] + Y_test_rel   # (N_test, MAX_OUT, 2)

    # Map back row-by-row
    xs = []
    ys = []
    for _, row in test.iterrows():
        g = row["game_id"]
        p = row["play_id"]
        n = row["nfl_id"]
        f = row["frame_id"] - 1  # 0-based

        key = (g, p, n)
        if key not in keys_to_row:
            xs.append(0.0)
            ys.append(0.0)
            continue

        idx  = keys_to_row[key]
        traj = Y_test_abs[idx]   # (MAX_OUT, 2) in STANDARDIZED coords

        # pick the frame we want in standardized coordinates
        if f < traj.shape[0]:
            x_pred = float(traj[f, 0])
            y_pred = float(traj[f, 1])
        else:
            # For frames beyond MAX_OUT, repeat last position
            x_pred = float(traj[-1, 0])
            y_pred = float(traj[-1, 1])

        # ---- NEW: de-standardize for original left plays ----
        play_dir = dir_map.get((g, p), "right")
        if play_dir == "left":
            # invert the same transform we used when standardizing
            x_pred = 120.0 - x_pred
            y_pred = 53.3 - y_pred

        xs.append(x_pred)
        ys.append(y_pred)

    preds = pd.DataFrame({"x": xs, "y": ys})
    assert len(preds) == len(test)
    return preds




# --------- START THE EVALUATION SERVER (MUST BE AT GLOBAL LEVEL) ---------
import kaggle_evaluation.nfl_inference_server

inference_server = kaggle_evaluation.nfl_inference_server.NFLInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    # This is what Kaggle uses on the hidden test set
    inference_server.serve()
else:
    # This lets you test locally inside the notebook, using the public train/test files
    inference_server.run_local_gateway(('/kaggle/input/nfl-big-data-bowl-2026-prediction/',))