In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

# Dependencies: torch, pandas, numpy, sklearn

import os, glob, math, random, time
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader


# Config / Hyperparams

DATA_DIR = "/kaggle/input/nfl-big-data-bowl-2026-prediction"
TRAIN_INPUT_GLOB = os.path.join(DATA_DIR, "train", "input_2023_w*.csv")
TRAIN_OUTPUT_GLOB = os.path.join(DATA_DIR, "train", "output_2023_w*.csv")
TEST_INPUT_PATH = os.path.join(DATA_DIR, "test_input.csv")
TEST_CSV_PATH = os.path.join(DATA_DIR, "test.csv")
SAMPLE_SUB_PATH = os.path.join(DATA_DIR, "sample_submission.csv")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

K_IN = 10            # number of pre-pass frames used (last K_IN frames)
HIDDEN = 128         # LSTM hidden size
NLAYERS = 2
BIDIR = True
BATCH_SIZE = 512
LR = 1e-3
EPOCHS = 10          # increase for better results
PATIENCE = 3
MAX_OUT_FRAMES = 60  # maximum frames to predict (unlikely to exceed; we'll find real max from data)
DROPOUT = 0.1


# Helpers: velocity & parsing

def add_velocity(df):
    # dir is degrees (0 right). Convert to vx, vy (yards/sec)
    rad = np.deg2rad(df["dir"].values.astype(float))
    vx = df["s"].values.astype(float) * np.cos(rad)
    vy = df["s"].values.astype(float) * np.sin(rad)
    df["vx"] = vx
    df["vy"] = vy
    return df

# -----------------------
# Load and aggregate train files
# -----------------------
print("Loading training files...")
input_paths = sorted(glob.glob(TRAIN_INPUT_GLOB))
output_paths = sorted(glob.glob(TRAIN_OUTPUT_GLOB))

train_inputs = [pd.read_csv(p) for p in input_paths]
train_outputs = [pd.read_csv(p) for p in output_paths]
df_in = pd.concat(train_inputs, ignore_index=True)
df_out = pd.concat(train_outputs, ignore_index=True)
print("Total input rows:", len(df_in), "Total output rows:", len(df_out))

# Precompute max output length
max_out = df_in["num_frames_output"].max() if "num_frames_output" in df_in.columns else MAX_OUT_FRAMES
MAX_OUT_FRAMES = int(max(max_out, MAX_OUT_FRAMES))
print("MAX_OUT_FRAMES set to", MAX_OUT_FRAMES)

# Add velocities
df_in = add_velocity(df_in)
# output file doesn't contain s/dir typically; we only need x,y for output.


# Build label encoders and scalers

# player_role, player_position, player_side encode
le_role = LabelEncoder()
df_in["player_role_f"] = le_role.fit_transform(df_in["player_role"].astype(str))

le_pos = LabelEncoder()
df_in["player_pos_f"] = le_pos.fit_transform(df_in["player_position"].astype(str))

side_map = {"Offense":1, "Defense":0}
df_in["player_side_f"] = df_in["player_side"].map(side_map).fillna(0).astype(int)

# We'll scale numeric features per column with StandardScaler fit on inputs
numeric_cols = ["x","y","s","a","vx","vy"]
scaler = StandardScaler()
scaler.fit(df_in[numeric_cols].values)

# -----------------------
# Build list of training examples
# For each (game_id, play_id, nfl_id) we gather the last K_IN pre-pass frames (pad if needed) and the full post-pass outputs.
# -----------------------
print("Creating training examples...")

# index outputs grouped for quick lookup
out_group = df_out.groupby(["game_id","play_id","nfl_id"])
in_group = df_in.groupby(["game_id","play_id","nfl_id"])

examples = []  # list of dicts: {key, input_df, out_df, num_out}
for name, in_grp in in_group:
    gid, pid, nid = name
    # corresponding out
    try:
        out_grp = out_group.get_group((gid,pid,nid)).sort_values("frame_id")
    except KeyError:
        continue
    in_grp_sorted = in_grp.sort_values("frame_id")
    # take last K_IN rows
    last_frames = in_grp_sorted.tail(K_IN)
    examples.append({
        "game_id": gid, "play_id": pid, "nfl_id": nid,
        "input": last_frames, "output": out_grp, "num_out": len(out_grp)
    })

print("Total examples:", len(examples))

# -----------------------
# Dataset + collate
# -----------------------
class TrajectoryDataset(Dataset):
    def __init__(self, examples, scaler, K_in=K_IN, max_out=MAX_OUT_FRAMES, numeric_cols=numeric_cols):
        self.examples = examples
        self.scaler = scaler
        self.K_in = K_in
        self.max_out = max_out
        self.numeric_cols = numeric_cols
    def __len__(self):
        return len(self.examples)
    def __getitem__(self, idx):
        ex = self.examples[idx]
        inp = ex["input"].copy()
        out = ex["output"].copy()
        # build input feature matrix (K_in x features)
        # features: x,y,vx,vy,a, role, pos, side, rel_ball_x,y
        # get ball landing location from input (same for all frames in play)
        ball_x = inp["ball_land_x"].iloc[-1] if "ball_land_x" in inp.columns else np.nan
        ball_y = inp["ball_land_y"].iloc[-1] if "ball_land_y" in inp.columns else np.nan

        # numeric scaled features
        num = inp[self.numeric_cols].values.astype(float)
        num = self.scaler.transform(num)
        # relative to ball
        rel = np.column_stack([ (inp["x"].values - ball_x), (inp["y"].values - ball_y) ])
        # categorical features
        role = inp["player_role_f"].values.reshape(-1,1)
        pos = inp["player_pos_f"].values.reshape(-1,1)
        side = inp["player_side_f"].values.reshape(-1,1)
        # concat features
        feats = np.concatenate([num, rel, role, pos, side], axis=1)  # shape (n_frames, feat_dim)
        # pad to K_in (pre-pad with first row)
        if feats.shape[0] < self.K_in:
            pad_n = self.K_in - feats.shape[0]
            pad_rows = np.repeat(feats[[0], :], pad_n, axis=0)
            feats = np.vstack([pad_rows, feats])
        else:
            feats = feats[-self.K_in:, :]
        # target: output x,y sequence (num_out x 2) relative to last input pos
        last_x = inp["x"].values[-1]
        last_y = inp["y"].values[-1]
        out_xy = out[["x","y"]].values.astype(float)
        # compute displacement from last input
        disp = out_xy - np.array([[last_x, last_y]])
        # pad/truncate to max_out frames
        T = disp.shape[0]
        disp_padded = np.zeros((self.max_out, 2), dtype=float)
        mask = np.zeros((self.max_out,), dtype=float)
        take = min(T, self.max_out)
        disp_padded[:take,:] = disp[:take,:]
        mask[:take] = 1.0
        sample = {
            "feats": feats.astype(np.float32),
            "target_disp": disp_padded.astype(np.float32),
            "mask": mask.astype(np.float32),
            "meta": (ex["game_id"], ex["play_id"], ex["nfl_id"], T)
        }
        return sample

def collate_fn(batch):
    feats = np.stack([b["feats"] for b in batch], axis=0)        # (B, K_in, feat_dim)
    targets = np.stack([b["target_disp"] for b in batch], axis=0) # (B, max_out, 2)
    masks = np.stack([b["mask"] for b in batch], axis=0)          # (B, max_out)
    metas = [b["meta"] for b in batch]
    return {
        "feats": torch.from_numpy(feats),
        "targets": torch.from_numpy(targets),
        "masks": torch.from_numpy(masks),
        "metas": metas
    }

# -----------------------
# Split examples into train/val (by game_id to avoid leakage)
# -----------------------
print("Splitting train/val by game...")
game_ids = list({(e["game_id"]) for e in examples})
random.shuffle(game_ids)
val_frac = 0.12
n_val_games = max(1, int(len(game_ids) * val_frac))
val_games = set(game_ids[:n_val_games])
train_examples = [e for e in examples if e["game_id"] not in val_games]
val_examples = [e for e in examples if e["game_id"] in val_games]
print("Train examples:", len(train_examples), "Val examples:", len(val_examples))

train_ds = TrajectoryDataset(train_examples, scaler)
val_ds = TrajectoryDataset(val_examples, scaler)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn, num_workers=2, pin_memory=True)

# -----------------------
# Model: encoder LSTM -> linear outputs for entire output sequence
# -----------------------
class TrajModel(nn.Module):
    def __init__(self, feat_dim, hidden=HIDDEN, n_layers=NLAYERS, bidir=BIDIR, dropout=DROPOUT, max_out=MAX_OUT_FRAMES):
        super().__init__()
        self.hidden = hidden
        self.n_layers = n_layers
        self.bidir = bidir
        self.max_out = max_out
        self.lstm = nn.LSTM(input_size=feat_dim, hidden_size=hidden, num_layers=n_layers,
                            batch_first=True, dropout=dropout, bidirectional=bidir)
        mult = 2 if bidir else 1
        # Map final hidden (concat of directions and layers) to a bottleneck then to outputs
        self.fc1 = nn.Linear(hidden * mult, hidden)
        self.act = nn.ReLU()
        # output raw displacements for each frame: max_out * 2
        self.fc_out = nn.Linear(hidden, max_out * 2)
    def forward(self, x):
        # x: (B, K_in, feat_dim)
        out, (hn, cn) = self.lstm(x)  # hn: (num_layers * num_directions, B, hidden)
        # take last layer's hidden for each direction
        # reshape hn to (num_layers, num_directions, B, hidden)
        nl = self.n_layers
        nd = 2 if self.bidir else 1
        hn = hn.view(nl, nd, x.size(0), self.hidden)
        # take last layer
        last = hn[-1]  # (nd, B, hidden)
        if nd == 2:
            # concat forward and backward
            last = torch.cat([last[0], last[1]], dim=1)  # (B, hidden*2)
        else:
            last = last[0]  # (B, hidden)
        h = self.act(self.fc1(last))
        out = self.fc_out(h)  # (B, max_out*2)
        out = out.view(x.size(0), self.max_out, 2)
        return out

# -----------------------
# Training utilities
# -----------------------
feat_dim = train_ds[0]["feats"].shape[1]
model = TrajModel(feat_dim, hidden=HIDDEN, n_layers=NLAYERS, bidir=BIDIR, max_out=MAX_OUT_FRAMES).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=1e-6)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5, verbose=True)
criterion = nn.MSELoss(reduction='none')  # we'll mask

def masked_rmse(pred, target, mask):
    # pred, target: (B, T, 2), mask: (B, T)
    mse = (pred - target).pow(2).mean(dim=2)  # (B, T)
    masked = mse * mask
    summed = masked.sum()
    denom = mask.sum().clamp_min(1.0)
    return torch.sqrt(summed / denom)

# -----------------------
# Train loop
# -----------------------
best_val = 1e9
patience = PATIENCE
best_epoch = -1
for epoch in range(1, EPOCHS+1):
    model.train()
    t0 = time.time()
    train_loss = 0.0
    n_batches = 0
    for batch in train_loader:
        feats = batch["feats"].to(DEVICE)   # (B, K_in, feat_dim)
        targets = batch["targets"].to(DEVICE)  # (B, max_out, 2)
        masks = batch["masks"].to(DEVICE)    # (B, max_out)
        preds = model(feats)                 # (B, max_out, 2)
        loss_all = criterion(preds, targets).mean(dim=2)  # (B, T)
        loss_masked = (loss_all * masks).sum() / masks.sum().clamp_min(1.0)
        optimizer.zero_grad()
        loss_masked.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        optimizer.step()
        train_loss += loss_masked.item()
        n_batches += 1
    train_loss /= max(1, n_batches)

    # validation
    model.eval()
    val_loss = 0.0
    n_batches = 0
    with torch.no_grad():
        for batch in val_loader:
            feats = batch["feats"].to(DEVICE)
            targets = batch["targets"].to(DEVICE)
            masks = batch["masks"].to(DEVICE)
            preds = model(feats)
            loss_all = criterion(preds, targets).mean(dim=2)
            loss_masked = (loss_all * masks).sum() / masks.sum().clamp_min(1.0)
            val_loss += loss_masked.item()
            n_batches += 1
    val_loss /= max(1, n_batches)
    scheduler.step(val_loss)
    t1 = time.time()
    print(f"Epoch {epoch} train_loss={train_loss:.5f} val_loss={val_loss:.5f} time={(t1-t0):.1f}s")
    # track best
    if val_loss < best_val:
        best_val = val_loss
        best_epoch = epoch
        torch.save({"model_state": model.state_dict(), "scaler": scaler, "le_role": le_role, "le_pos": le_pos},
                   "best_model.pth")
        print("Saved best_model.pth")
        patience = PATIENCE  # reset
    else:
        patience -= 1
        if patience <= 0:
            print("Early stopping.")
            break

# -----------------------
# Inference on test set
# -----------------------
print("Running inference on test set...")
# load test input and test.csv (list of ids to predict)
test_input = pd.read_csv(TEST_INPUT_PATH)
test_df = pd.read_csv(TEST_CSV_PATH)
sample_sub = pd.read_csv(SAMPLE_SUB_PATH)

# add velocity and encodings to test_input
test_input = add_velocity(test_input)
# map encodings
test_input["player_role_f"] = le_role.transform(test_input["player_role"].astype(str))
test_input["player_pos_f"] = le_pos.transform(test_input["player_position"].astype(str))
test_input["player_side_f"] = test_input["player_side"].map(side_map).fillna(0).astype(int)

# Build last K_in state per (game,play,nfl)
last_state = test_input.groupby(["game_id","play_id","nfl_id"]).apply(lambda g: g.sort_values("frame_id").tail(K_IN)).reset_index(drop=True)

# Prepare a dict for quick lookups of model predictions
# We'll create batches from last_state, run model, then map outputs to required ids
model = model.to(DEVICE)
model.eval()

# Create features same as dataset
def build_feats_from_rows(df_rows):
    # df_rows: K_in rows (or less) sorted in ascending frame order
    df = df_rows.copy()
    ball_x = df["ball_land_x"].iloc[-1] if "ball_land_x" in df.columns else 0.0
    ball_y = df["ball_land_y"].iloc[-1] if "ball_land_y" in df.columns else 0.0
    num = df[numeric_cols].values.astype(float)
    num = scaler.transform(num)
    rel = np.column_stack([ (df["x"].values - ball_x), (df["y"].values - ball_y) ])
    role = df["player_role_f"].values.reshape(-1,1)
    pos = df["player_pos_f"].values.reshape(-1,1)
    side = df["player_side_f"].values.reshape(-1,1)
    feats = np.concatenate([num, rel, role, pos, side], axis=1)
    if feats.shape[0] < K_IN:
        pad_n = K_IN - feats.shape[0]
        pad_rows = np.repeat(feats[[0], :], pad_n, axis=0)
        feats = np.vstack([pad_rows, feats])
    else:
        feats = feats[-K_IN:,:]
    return feats.astype(np.float32)

# build list of group keys and their feature arrays
grouped = last_state.groupby(["game_id","play_id","nfl_id"])
keys = []
feat_list = []
meta_info = []
for key, grp in grouped:
    keys.append(key)
    grp_sorted = grp.sort_values("frame_id")
    feat_list.append(build_feats_from_rows(grp_sorted))
    # store last_x,last_y and last_frame and num_out if present in df_rows
    last_row = grp_sorted.iloc[-1]
    last_x = last_row["x"]; last_y = last_row["y"]
    last_frame = int(last_row["frame_id"])
    num_out = int(last_row.get("num_frames_output", MAX_OUT_FRAMES))
    meta_info.append((last_x, last_y, last_frame, num_out))

# batch prediction
B = 2048
pred_dict = {}  # (game,play,nfl) -> predicted absolute positions array (T_out, 2)
with torch.no_grad():
    for i in range(0, len(feat_list), B):
        batch_feats = np.stack(feat_list[i:i+B], axis=0)
        batch_feats_t = torch.from_numpy(batch_feats).to(DEVICE)
        batch_preds = model(batch_feats_t).cpu().numpy()  # (b, max_out, 2) predicted displacements
        for j, key in enumerate(keys[i:i+B]):
            last_x, last_y, last_frame, num_out = meta_info[i+j]
            disp = batch_preds[j]
            take = min(int(num_out), disp.shape[0])
            abs_pos = disp[:take,:] + np.array([[last_x, last_y]])
            pred_dict[key] = (last_frame, abs_pos)

# Build submission rows by reading test.csv rows and mapping
rows = []
for row in test_df.itertuples(index=False):
    gid, pid, nid, fid = row.game_id, row.play_id, row.nfl_id, row.frame_id
    key = (gid, pid, nid)
    if key not in pred_dict:
        # fallback to last known pos
        # try to find last known in test_input
        grp = test_input[(test_input.game_id==gid)&(test_input.play_id==pid)&(test_input.nfl_id==nid)]
        if len(grp)==0:
            x_pred, y_pred = 60.0, 26.65
        else:
            last = grp.sort_values("frame_id").iloc[-1]
            # simple extrapolation fallback
            dt = fid - int(last["frame_id"])
            x_pred = last["x"] + last["vx"] * dt
            y_pred = last["y"] + last["vy"] * dt
            x_pred = float(np.clip(x_pred, 0, 120))
            y_pred = float(np.clip(y_pred, 0, 53.3))
    else:
        last_frame, abs_pos = pred_dict[key]
        idx = int(fid - last_frame - 1)  # frame indices in output start at 1 after last input frame; adjust as needed
        if idx < 0:
            # requested frame is before predicted outputs -> fallback to last known
            grp = test_input[(test_input.game_id==gid)&(test_input.play_id==pid)&(test_input.nfl_id==nid)]
            last = grp.sort_values("frame_id").iloc[-1]
            x_pred = last["x"]; y_pred = last["y"]
        elif idx >= abs_pos.shape[0]:
            # beyond predicted length: use last predicted
            p = abs_pos[-1]
            x_pred, y_pred = float(p[0]), float(p[1])
        else:
            p = abs_pos[idx]
            x_pred, y_pred = float(p[0]), float(p[1])
    rows.append((f"{gid}_{pid}_{nid}_{fid}", x_pred, y_pred))

sub_df = pd.DataFrame(rows, columns=["id","x","y"])
# ensure same order as sample_sub
submission = sample_sub.drop(columns=["x","y"]).merge(sub_df, on="id", how="left")
# final clipping
submission["x"] = submission["x"].clip(0,120)
submission["y"] = submission["y"].clip(0,53.3)
submission.to_csv("submission.csv", index=False)
print("Saved submission.csv with shape", submission.shape)
