In [None]:
# ============================================================
# Import packages
# ============================================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import LabelEncoder
import os
import glob
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm 



In [None]:
# Display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 120)
sns.set_style("whitegrid")

print("‚úÖ Libraries installed and imported successfully!")

In [None]:
# ============================================================
#  Load and Explore Data
# ============================================================

# Paths to your uploaded files
test_input_path = "/kaggle/input/nfl-big-data-bowl-2026-prediction/test_input.csv"
test_path = "/kaggle/input/nfl-big-data-bowl-2026-prediction/test.csv"


# Load datasets
test_input = pd.read_csv(test_input_path)
test = pd.read_csv(test_path)

# Show basic info
print("‚úÖ Files loaded successfully!\n")

print("test_input.csv shape:", test_input.shape)
print("test.csv shape:", test.shape)

print("\n--- test_input columns ---")
print(test_input.columns.tolist())

print("\n--- test columns ---")
print(test.columns.tolist())

# Display first few rows
display(test_input.head(3))
display(test.head(3))

In [None]:
# ---------------------------
# 1Ô∏è‚É£ Encode categorical columns
# ---------------------------
test_input = test_input.copy()

# Encode categorical features
for col in ['player_side', 'player_role', 'player_position']:
    if col in test_input.columns:
        le = LabelEncoder()
        test_input[col + '_enc'] = le.fit_transform(test_input[col].astype(str))

# ---------------------------
# 2Ô∏è‚É£ Add geometric & distance-based features
# ---------------------------
def add_geometric_features(df):
    df = df.copy()
    
    # Distance from player to ball landing location
    df['distance_to_ball'] = np.sqrt(
        (df['x'] - df['ball_land_x'])**2 + (df['y'] - df['ball_land_y'])**2
    )
    
    # Relative positions
    df['relative_x'] = df['x'] - df['ball_land_x']
    df['relative_y'] = df['y'] - df['ball_land_y']
    
    # Direction towards ball (angle)
    df['angle_to_ball'] = np.degrees(np.arctan2(df['relative_y'], df['relative_x']))
    
    # Speed and acceleration (already provided)
    df['speed'] = df['s']
    df['accel'] = df['a']
    
    # Normalized frame progress
    df['frame_id_norm'] = df.groupby(['game_id', 'play_id'])['frame_id'].transform(
        lambda x: (x - x.min()) / (x.max() - x.min() + 1e-6)
    )
    
    # Target flag (if this player is the one to predict)
    df['is_targeted'] = (df['player_role'] == 'Targeted Receiver').astype(int)
    
    return df

# Apply to dataset
features_df = add_geometric_features(test_input)

print("‚úÖ Feature engineering complete")
print("Feature sample:")
display(features_df.head(5))

# ---------------------------
# 3Ô∏è‚É£ Select model input columns
# ---------------------------
feature_cols = [
    'x', 'y', 's', 'a', 'distance_to_ball', 'angle_to_ball',
    'relative_x', 'relative_y', 'frame_id_norm', 'is_targeted',
    'player_side_enc', 'player_role_enc', 'player_position_enc'
]

print(f"Total engineered features: {len(feature_cols)}")
print(feature_cols)

In [None]:
# ============================================================
# Step 5: Two Model Versions
#  - Baseline: GradientBoostingRegressor (sklearn)
#  - Neural: PyTorch feedforward + time-conditioning
# ============================================================
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# -----------------------------
# 1) Discover training files
# -----------------------------
input_files = []
output_files = []
for dirname, _, filenames in os.walk('/kaggle/input/nfl-big-data-bowl-2026-prediction/train/'):
    for fn in filenames:
        if fn.lower().startswith("input_") and fn.lower().endswith(".csv"):
            input_files.append(os.path.join(dirname, fn))
        if fn.lower().startswith("output_") and fn.lower().endswith(".csv"):
            output_files.append(os.path.join(dirname, fn))

#/kaggle/input/nfl-big-data-bowl-2026-prediction/train/input_2023_w17.csv
#/kaggle/input/nfl-big-data-bowl-2026-prediction/train/output_2023_w05.csv

# If the canonical 'train' folder exists (competition legacy names), include those
if not input_files or not output_files:
    # fallback to common names
    #for dirname, _, filenames in os.walk('/kaggle/input'):
    for dirname, _, filenames in os.walk('/kaggle/input/nfl-big-data-bowl-2026-prediction/train/'): 
        for fn in filenames:
            if "input_2023" in fn.lower() or "input" in fn.lower():
                if fn.lower().endswith('.csv'):
                    input_files.append(os.path.join(dirname, fn))
            if "output_2023" in fn.lower() or "output" in fn.lower():
                if fn.lower().endswith('.csv'):
                    output_files.append(os.path.join(dirname, fn))

print("Found input files:", input_files[:5])
print("Found output files:", output_files[:5])

if not input_files or not output_files:
    print("\n‚ö†Ô∏è Could not find training input/output CSVs under /kaggle/input.")
    print("If you do have training files, place them in the Kaggle input directory or adapt paths.")
    raise SystemExit()

In [None]:
# -----------------------------
# 2) Load & concat training files
# -----------------------------
train_input = pd.concat([pd.read_csv(p) for p in input_files], ignore_index=True)
train_output = pd.concat([pd.read_csv(p) for p in output_files], ignore_index=True)

print("train_input shape:", train_input.shape)
print("train_output shape:", train_output.shape)

In [None]:
# -----------------------------
# 3) Build training examples:
#    - For each (game, play, nfl_id) take last input row (pre-pass snapshot)
#    - Join to all output frames (these are the targets)
#    - Each output frame becomes one training row; features come from last input snapshot
# -----------------------------

# get last input snapshot per player per play
last_input = (
    train_input
    .sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
    .groupby(['game_id', 'play_id', 'nfl_id'], as_index=False)
    .last()
)

# join with all output rows (targets)
merged = pd.merge(
    train_output,
    last_input.drop(columns=['x', 'y']),  # drop input x,y because output x,y are targets
    on=['game_id', 'play_id', 'nfl_id'],
    how='left',
    suffixes=('_out', '_in')
)

# drop rows with missing critical values
merged = merged.dropna(subset=['frame_id_out', 'ball_land_x', 'ball_land_y'])

# keep only relevant columns
print("Merged training rows:", merged.shape)
display(merged.head())

In [None]:
# -----------------------------
# 4) Feature engineering
# -----------------------------
def add_features(df):
    df = df.copy()

    # Fix missing column names ‚Äî detect what exists
    if 'x_in' not in df.columns:
        # if the merge dropped input x/y, try using backup naming
        input_x_col = 'x_snap' if 'x_snap' in df.columns else None
        input_y_col = 'y_snap' if 'y_snap' in df.columns else None
    else:
        input_x_col, input_y_col = 'x_in', 'y_in'

    # fallback if not found (skip feature)
    if input_x_col is None or input_y_col is None:
        print("‚ö†Ô∏è Warning: No input x/y found; creating dummy 0 columns")
        df['x_in'] = 0.0
        df['y_in'] = 0.0
        input_x_col, input_y_col = 'x_in', 'y_in'
    else:
        df['x_in'] = df[input_x_col]
        df['y_in'] = df[input_y_col]

    # basic geometry relative to ball landing
    df['distance_to_ball'] = np.sqrt((df['x_in'] - df['ball_land_x'])**2 + (df['y_in'] - df['ball_land_y'])**2)
    df['relative_x'] = df['x_in'] - df['ball_land_x']
    df['relative_y'] = df['y_in'] - df['ball_land_y']
    df['angle_to_ball'] = np.degrees(np.arctan2(df['relative_y'], df['relative_x']))

    # normalize frame id
    if 'frame_id_out' in df.columns:
        fid = 'frame_id_out'
    elif 'frame_id' in df.columns:
        fid = 'frame_id'
    else:
        fid = None

    if fid:
        df['frame_id_norm'] = (
            (df[fid] - df.groupby(['game_id', 'play_id'])[fid].transform('min')) /
            (df.groupby(['game_id', 'play_id'])[fid].transform('max') -
             df.groupby(['game_id', 'play_id'])[fid].transform('min') + 1e-6)
        )

    # speed & accel from snapshot
    if 's' in df.columns: df['speed'] = df['s']
    if 'a' in df.columns: df['accel'] = df['a']

    # targeted receiver flag (if available)
    df['is_targeted'] = (df.get('player_role', '') == 'Targeted Receiver').astype(int)
    df['player_side_enc'] = (df.get('player_side', '') == 'Offense').astype(int)

    # keep snapshot positions for later use
    df['x_snap'] = df['x_in']
    df['y_snap'] = df['y_in']

    return df


# Apply feature function
merged = add_features(merged)

# target columns
merged['target_x'] = merged.get('x_out', merged.get('x'))
merged['target_y'] = merged.get('y_out', merged.get('y'))

# Create relative frame index per player
fid_col = 'frame_id_out' if 'frame_id_out' in merged.columns else 'frame_id'
merged['out_frame_index'] = (
    merged.groupby(['game_id','play_id','nfl_id'])[fid_col]
    .rank(method='first')
    .astype(int)
)

# Debug sample
print("‚úÖ Example training sample:")
display(merged[[
    'game_id','play_id','nfl_id',fid_col,'out_frame_index',
    'x_snap','y_snap','target_x','target_y',
    'distance_to_ball','angle_to_ball','is_targeted'
]].head(5))


In [None]:
# -----------------------------
# 5) Prepare X,y
# Each training row corresponds to one output frame (frame_id in output)
# We'll use the snapshot features + the integer index of the output frame (normalized) as input.
# -----------------------------
feature_cols = [
    'x_snap','y_snap','speed','accel','distance_to_ball','angle_to_ball',
    'relative_x','relative_y','frame_id_norm','is_targeted','player_side_enc'
]
# add normalized out_frame_index
merged['out_frame_norm'] = merged['out_frame_index'] / (merged.groupby(['game_id','play_id','nfl_id'])['out_frame_index'].transform('max') + 1e-6)
feature_cols.append('out_frame_norm')

X = merged[feature_cols].fillna(0).values
y = merged[['target_x','target_y']].values

print("X shape:", X.shape, "y shape:", y.shape)
# -----------------------------
# 6) Train/val split
# -----------------------------
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42)
print("Train samples:", X_train.shape[0], "Val samples:", X_val.shape[0])

In [None]:
# -----------------------------
# 7) Baseline model: GradientBoosting with MultiOutputRegressor
# -----------------------------
# print("\n=== Baseline: GradientBoostingRegressor ===")
# base_est = GradientBoostingRegressor(n_estimators=200, learning_rate=0.05, max_depth=6, random_state=42)
# baseline = MultiOutputRegressor(base_est)
# baseline.fit(X_train, y_train)
# y_pred_val = baseline.predict(X_val)
# rmse_baseline = np.sqrt(mean_squared_error(y_val, y_pred_val))
# print(f"Baseline RMSE (x,y combined): {rmse_baseline:.4f}")

# # also compute RMSE per coordinate
# rmse_x = np.sqrt(mean_squared_error(y_val[:,0], y_pred_val[:,0]))
# rmse_y = np.sqrt(mean_squared_error(y_val[:,1], y_pred_val[:,1]))
# print(f"RMSE x: {rmse_x:.4f}, RMSE y: {rmse_y:.4f}")

In [None]:
# -----------------------------
# 8) Neural model: simple feedforward that conditions on out_frame_norm
# -----------------------------
print("\n=== Neural Model (PyTorch): Feedforward Conditioning on time index ===")

class FeedforwardTimeModel(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 2)  # predict x,y
        )
    def forward(self, x):
        return self.net(x)

# convert data to torch
X_train_t = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_t = torch.tensor(y_train, dtype=torch.float32).to(device)
X_val_t = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_t = torch.tensor(y_val, dtype=torch.float32).to(device)

model = FeedforwardTimeModel(X_train.shape[1]).to(device)
opt = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

# training loop
n_epochs = 40
batch_size = 1024
best_val_rmse = 1e9
best_state = None

for epoch in range(n_epochs):
    model.train()
    perm = np.random.permutation(X_train_t.shape[0])
    losses = []
    for i in range(0, len(perm), batch_size):
        idx = perm[i:i+batch_size]
        xb = X_train_t[idx]
        yb = y_train_t[idx]
        pred = model(xb)
        loss = criterion(pred, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()
        losses.append(loss.item())
    # validation
    model.eval()
    with torch.no_grad():
        pred_val = model(X_val_t).cpu().numpy()
    val_rmse = np.sqrt(mean_squared_error(y_val, pred_val))
    if val_rmse < best_val_rmse:
        best_val_rmse = val_rmse
        best_state = model.state_dict()
    if epoch % 5 == 0 or epoch == n_epochs-1:
        print(f"Epoch {epoch+1}/{n_epochs} | train_loss={np.mean(losses):.4f} | val_rmse={val_rmse:.4f}")

# load best weights
if best_state is not None:
    model.load_state_dict(best_state)
print(f"Best neural val RMSE: {best_val_rmse:.4f}")

In [None]:
# # ---------PLOT DISTRIBUTION------------------
# # 1Ô∏è‚É£ Basic summary statistics
# # ---------------------------
# print("=== Basic Data Info ===")
# print(test_input.info())
# print("\n=== Sample Rows ===")
# display(test_input.head(5))

# print("\n=== Unique Games & Plays ===")
# print("Games:", test_input['game_id'].nunique())
# print("Plays:", test_input['play_id'].nunique())
# print("Players:", test_input['nfl_id'].nunique())

# # ---------------------------
# # 2Ô∏è‚É£ Distribution plots
# # ---------------------------
# fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# sns.histplot(test_input['x'], bins=50, ax=axes[0], color='skyblue', kde=True)
# axes[0].set_title("Distribution of X Positions (Field Length)")
# axes[0].set_xlabel("X (yards)")

# sns.histplot(test_input['y'], bins=30, ax=axes[1], color='lightgreen', kde=True)
# axes[1].set_title("Distribution of Y Positions (Field Width)")
# axes[1].set_xlabel("Y (yards)")

# sns.countplot(x='player_side', data=test_input, ax=axes[2], palette='Set2')
# axes[2].set_title("Offense vs Defense Player Counts")

# plt.tight_layout()
# plt.show()

# # ---------------------------
# # 3Ô∏è‚É£ Function to visualize play on field
# # ---------------------------
# def plot_play(df, game_id, play_id):
#     play_df = df[(df['game_id'] == game_id) & (df['play_id'] == play_id)]
#     if play_df.empty:
#         print(f"No data found for game {game_id}, play {play_id}")
#         return

#     plt.figure(figsize=(10, 5))
#     plt.title(f"Player Positions - Game {game_id}, Play {play_id}", fontsize=14)

#     # Field boundaries
#     plt.xlim(0, 120)
#     plt.ylim(0, 53.3)
#     plt.gca().set_facecolor("mediumseagreen")

#     # Plot players
#     off = play_df[play_df['player_side'] == 'Offense']
#     defn = play_df[play_df['player_side'] == 'Defense']

#     plt.scatter(off['x'], off['y'], color='blue', label='Offense', s=50, alpha=0.7)
#     plt.scatter(defn['x'], defn['y'], color='red', label='Defense', s=50, alpha=0.7)

#     # Mark ball landing
#     if 'ball_land_x' in play_df.columns and 'ball_land_y' in play_df.columns:
#         plt.scatter(play_df['ball_land_x'].iloc[0], play_df['ball_land_y'].iloc[0], 
#                     color='gold', s=120, marker='*', edgecolor='black', label='Ball Landing')

#     plt.xlabel("X (yards along field length)")
#     plt.ylabel("Y (yards across field width)")
#     plt.legend()
#     plt.show()

# # ---------------------------
# # 4Ô∏è‚É£ Visualize one or two sample plays
# # ---------------------------
# sample_game = test_input['game_id'].iloc[0]
# sample_play = test_input['play_id'].iloc[0]
# plot_play(test_input, sample_game, sample_play)

In [None]:
from multiprocessing import Pool as MultiprocessingPool, cpu_count

In [None]:
# import os
# import pandas as pd
# import polars as pl
# import numpy as np
# import torch
# from sklearn.ensemble import GradientBoostingRegressor
# from sklearn.multioutput import MultiOutputRegressor

# import kaggle_evaluation.nfl_inference_server

# # -----------------------------------------------------
# # Load your pre-trained models here (if pre-saved)
# # Or reinitialize from current notebook context
# # -----------------------------------------------------
# device = "cuda" if torch.cuda.is_available() else "cpu"

# # Assume we have baseline and neural model objects defined in memory
# # If you saved them as pickle/pt, load them here
# # Example:
# # baseline = joblib.load("/kaggle/input/my-models/baseline.pkl")
# # model.load_state_dict(torch.load("/kaggle/input/my-models/neural.pt"))

# # -----------------------------------------------------
# # Define the feature function (reused from your notebook)
# # -----------------------------------------------------
# def add_features(df: pd.DataFrame) -> pd.DataFrame:
#     df = df.copy()
#     if 'x' in df.columns and 'ball_land_x' in df.columns:
#         df['distance_to_ball'] = np.sqrt((df['x'] - df['ball_land_x'])**2 + (df['y'] - df['ball_land_y'])**2)
#         df['relative_x'] = df['x'] - df['ball_land_x']
#         df['relative_y'] = df['y'] - df['ball_land_y']
#         df['angle_to_ball'] = np.degrees(np.arctan2(df['relative_y'], df['relative_x']))
#     else:
#         df['distance_to_ball'] = 0.0
#         df['relative_x'] = 0.0
#         df['relative_y'] = 0.0
#         df['angle_to_ball'] = 0.0

#     if 's' in df.columns: df['speed'] = df['s']
#     if 'a' in df.columns: df['accel'] = df['a']

#     df['is_targeted'] = (df.get('player_role', '') == 'Targeted Receiver').astype(int)
#     df['player_side_enc'] = (df.get('player_side', '') == 'Offense').astype(int)

#     df['x_snap'] = df['x']
#     df['y_snap'] = df['y']
#     return df


# # -----------------------------------------------------
# # The main predict() function required by Kaggle
# # -----------------------------------------------------
# def predict(test: pl.DataFrame, test_input: pl.DataFrame) -> pl.DataFrame | pd.DataFrame:
#     """Generate predictions for x, y positions for each player-frame in the test batch."""
#     # Convert Polars to Pandas for processing
#     test = test.to_pandas()
#     test_input = test_input.to_pandas()

#     # Merge with last known player state (test_input is pre-throw snapshot)
#     merged = pd.merge(
#         test,
#         test_input.groupby(["game_id", "play_id", "nfl_id"], as_index=False).last(),
#         on=["game_id", "play_id", "nfl_id"],
#         how="left",
#         suffixes=("", "_snap")
#     )

#      # Add features
#     merged = add_features(merged)

#     # Compute normalized frame id
#     merged["frame_id_norm"] = (
#         merged.groupby(["game_id", "play_id"])["frame_id"]
#         .transform(lambda x: (x - x.min()) / (x.max() - x.min() + 1e-6))
#     )

#     # Normalized output frame index per player
#     merged["out_frame_index"] = merged.groupby(["game_id", "play_id", "nfl_id"])["frame_id"].rank(method="first")
#     merged["out_frame_norm"] = merged["out_frame_index"] / (
#         merged.groupby(["game_id", "play_id", "nfl_id"])["out_frame_index"].transform("max") + 1e-6
#     )

#     # Features for inference (must match training exactly!)
#     feature_cols = [
#         "x_snap", "y_snap", "speed", "accel",
#         "distance_to_ball", "angle_to_ball",
#         "relative_x", "relative_y",
#         "frame_id_norm", "is_targeted",
#         "player_side_enc", "out_frame_norm"
#     ]

#     X = merged[feature_cols].fillna(0).values


#     # Predictions from baseline
#     baseline_preds = baseline.predict(X)

#     # Predictions from neural model
#     model.eval()
#     with torch.no_grad():
#         Xt = torch.tensor(X, dtype=torch.float32).to(device)
#         neural_preds = model(Xt).cpu().numpy()

#     # Blend
#     x_pred = 0.5 * baseline_preds[:, 0] + 0.5 * neural_preds[:, 0]
#     y_pred = 0.5 * baseline_preds[:, 1] + 0.5 * neural_preds[:, 1]

#     predictions = pl.DataFrame({
#         "x": x_pred.tolist(),
#         "y": y_pred.tolist()
#     })

#     assert len(predictions) == len(test)
#     return predictions


# # -----------------------------------------------------
# # Run server for evaluation or local gateway testing
# # -----------------------------------------------------
# inference_server = kaggle_evaluation.nfl_inference_server.NFLInferenceServer(predict)

# if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
#     inference_server.serve()
# else:
#     inference_server.run_local_gateway(("/kaggle/input/nfl-big-data-bowl-2026-prediction/",))


In [None]:
# ============================================================
# üèà NFL Big Data Bowl 2026 ‚Äî Final Submission
# ============================================================

import os
import polars as pl
import pandas as pd
import numpy as np
import torch
import kaggle_evaluation.nfl_inference_server


# ------------------------------------------------------------
# CONFIG
# ------------------------------------------------------------
class Config:
    MODEL_DIR = "/kaggle/input/my-trained-models"  # adjust if you have saved models
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    BLEND_WEIGHT = 0.5  # baseline / neural blend


# ------------------------------------------------------------
# MAIN PREDICTOR CLASS
# ------------------------------------------------------------
class MainPredictor:
    """Unified model wrapper for inference."""
    def __init__(self):
        print("üß† Initializing Main Predictor...")
        self.device = Config.DEVICE
        self.baseline = baseline       # GradientBoostingRegressor
        self.model = model             # PyTorch MLP
        self.model.eval()
        print(f"‚úÖ Models loaded (device={self.device})")

    def _prepare_features(self, test: pd.DataFrame, test_input: pd.DataFrame) -> pd.DataFrame:
        """Merge and compute engineered features."""
        # Merge last known snapshot
        last_snap = (
            test_input.sort_values(["game_id", "play_id", "nfl_id", "frame_id"])
            .groupby(["game_id", "play_id", "nfl_id"], as_index=False)
            .last()
        )
        df = pd.merge(
            test,
            last_snap,
            on=["game_id", "play_id", "nfl_id"],
            how="left",
            suffixes=("", "_snap")
        )
        df = add_features(df)

        # Normalized frame ids
        df["frame_id_norm"] = (
            df.groupby(["game_id", "play_id"])["frame_id"]
            .transform(lambda x: (x - x.min()) / (x.max() - x.min() + 1e-6))
        )
        df["out_frame_index"] = df.groupby(["game_id", "play_id", "nfl_id"])["frame_id"].rank(method="first")
        df["out_frame_norm"] = df["out_frame_index"] / (
            df.groupby(["game_id", "play_id", "nfl_id"])["out_frame_index"].transform("max") + 1e-6
        )

        # Ensure consistent features
        feature_cols = [
            "x_snap", "y_snap", "speed", "accel",
            "distance_to_ball", "angle_to_ball",
            "relative_x", "relative_y",
            "frame_id_norm", "is_targeted",
            "player_side_enc", "out_frame_norm"
        ]
        for col in feature_cols:
            if col not in df.columns:
                df[col] = 0.0
        return df, feature_cols

    def predict(self, test: pl.DataFrame, test_input: pl.DataFrame) -> pl.DataFrame:
        """Main prediction logic for Kaggle inference API."""
        test = test.to_pandas()
        test_input = test_input.to_pandas()

        # Feature preparation
        merged, feature_cols = self._prepare_features(test, test_input)
        X = merged[feature_cols].fillna(0).astype(float).values

        # Baseline predictions
        base_preds = self.baseline.predict(X)

        # Neural predictions
        with torch.no_grad():
            Xt = torch.tensor(X, dtype=torch.float32).to(self.device)
            neural_preds = self.model(Xt).cpu().numpy()

        # Blend both predictions
        w = Config.BLEND_WEIGHT
        x_pred = w * base_preds[:, 0] + (1 - w) * neural_preds[:, 0]
        y_pred = w * base_preds[:, 1] + (1 - w) * neural_preds[:, 1]

        preds = pl.DataFrame({"x": x_pred.tolist(), "y": y_pred.tolist()})
        assert len(preds) == len(test)
        return preds


# ------------------------------------------------------------
# SERVER SETUP
# ------------------------------------------------------------
lb_predictor = MainPredictor()

def predict(test: pl.DataFrame, test_input: pl.DataFrame) -> pl.DataFrame:
    """Competition prediction function using blended pipeline."""
    return lb_predictor.predict(test, test_input)


print("üöÄ Setting up NFL Big Data Bowl 2026 Inference Server...")
print(f"üìÅ Model directory: {Config.MODEL_DIR}")
print(f"üéØ Blended Predictor: Baseline + Neural")
print(f"üîß Features: 12 engineered + normalized time features")
print(f"üèà Model blend weight: {Config.BLEND_WEIGHT:.2f}")
print(f"üíª Device: {Config.DEVICE}")

inference_server = kaggle_evaluation.nfl_inference_server.NFLInferenceServer(predict)

if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
    print("üèà Starting competition inference server with blended pipeline...")
    inference_server.serve()
else:
    print("üî¨ Running local gateway for testing...")
    inference_server.run_local_gateway(("/kaggle/input/nfl-big-data-bowl-2026-prediction/",))


In [None]:
# lb_predictor = MainPredictor()

# def predict(test: pl.DataFrame, test_input: pl.DataFrame) -> pl.DataFrame:
#     """Competition prediction function using LB 0.604 pipeline"""
#     return lb_predictor.predict(test, test_input)

# # SERVER SETUP
# print("üöÄ Setting up NFL Big Data Bowl 2026 Inference Server...")
# print(f"üìÅ Model directory: {Config.MODEL_DIR}")
# print(f"üéØ Target: LB 0.604 Performance")
# print(f"üîß Features: 114 complete features with player interactions")
# print(f"üèà Model: 5-fold ensemble with full feature engineering")

# inference_server = kaggle_evaluation.nfl_inference_server.NFLInferenceServer(predict)

# if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
#     print("üèà Starting competition inference server with LB 0.604 pipeline...")
#     inference_server.serve()
# else:
#     print("üî¨ Running local test gateway with LB 0.604 pipeline...")
#     inference_server.run_local_gateway(('/kaggle/input/nfl-big-data-bowl-2026-prediction/',))

In [None]:
# # ============================================================
# # üèÅ STEP 11 ‚Äî Generate Final Submission File
# # ============================================================

# import pandas as pd
# import numpy as np
# import torch
# import os

# # ------------------------------------------------------------
# # 1Ô∏è‚É£ Locate and load test files
# # ------------------------------------------------------------
# test_candidates = [
#     "/kaggle/input/nfl-big-data-bowl-2026-prediction/test.csv"
#     # "/kaggle/input/nfl-big-data-bowl-2026-prediction/test.csv",
#     # "/mnt/data/test.csv"
# ]
# test_path = next((p for p in test_candidates if os.path.exists(p)), None)
# if test_path is None:
#     raise FileNotFoundError("‚ùå test.csv not found.")
# test_df = pd.read_csv(test_path)
# print(f"‚úÖ Loaded test.csv ‚Äî {len(test_df):,} rows")

# test_input_candidates = [
#     "/kaggle/input/nfl-big-data-bowl-2026-prediction/test_input.csv"
#     # "/kaggle/input/nfl-big-data-bowl-2026-prediction/test_input.csv",
#     # "/mnt/data/test_input.csv"
# ]
# test_input_path = next((p for p in test_input_candidates if os.path.exists(p)), None)
# if test_input_path is None:
#     raise FileNotFoundError("‚ùå test_input.csv not found.")
# test_input = pd.read_csv(test_input_path)
# print(f"‚úÖ Loaded test_input.csv ‚Äî {len(test_input):,} rows")

# # ------------------------------------------------------------
# # 2Ô∏è‚É£ Merge last snapshot with test frames
# # ------------------------------------------------------------
# snap_last = (
#     test_input
#     .sort_values(["game_id", "play_id", "nfl_id", "frame_id"])
#     .groupby(["game_id", "play_id", "nfl_id"], as_index=False)
#     .last()
# )
# merged_test = pd.merge(
#     test_df,
#     snap_last,
#     on=["game_id", "play_id", "nfl_id"],
#     how="left",
#     suffixes=("", "_snap")
# )

# # ------------------------------------------------------------
# # 3Ô∏è‚É£ Add features (reuse same function)
# # ------------------------------------------------------------
# merged_test = add_features(merged_test)

# merged_test["frame_id_norm"] = (
#     merged_test.groupby(["game_id", "play_id"])["frame_id"]
#     .transform(lambda x: (x - x.min()) / (x.max() - x.min() + 1e-6))
# )
# merged_test["out_frame_index"] = merged_test.groupby(
#     ["game_id", "play_id", "nfl_id"]
# )["frame_id"].rank(method="first").astype(int)
# merged_test["out_frame_norm"] = merged_test["out_frame_index"] / (
#     merged_test.groupby(["game_id", "play_id", "nfl_id"])["out_frame_index"].transform("max") + 1e-6
# )

# # ------------------------------------------------------------
# # 4Ô∏è‚É£ Prepare model input (must match training feature_cols)
# # ------------------------------------------------------------
# feature_cols = [
#     "x_snap", "y_snap", "speed", "accel",
#     "distance_to_ball", "angle_to_ball",
#     "relative_x", "relative_y",
#     "frame_id_norm", "is_targeted",
#     "player_side_enc", "out_frame_norm"
# ]
# X_test = merged_test[feature_cols].fillna(0).values

# # ------------------------------------------------------------
# # 5Ô∏è‚É£ Generate predictions from both models
# # ------------------------------------------------------------
# print("‚öôÔ∏è Generating predictions...")

# baseline_preds = baseline.predict(X_test)
# model.eval()
# with torch.no_grad():
#     Xt = torch.tensor(X_test, dtype=torch.float32).to(device)
#     neural_preds = model(Xt).cpu().numpy()

# # Blend (50/50)
# merged_test["x"] = 0.5 * baseline_preds[:, 0] + 0.5 * neural_preds[:, 0]
# merged_test["y"] = 0.5 * baseline_preds[:, 1] + 0.5 * neural_preds[:, 1]

# # ------------------------------------------------------------
# # 6Ô∏è‚É£ Format submission file
# # ------------------------------------------------------------
# submission = merged_test[["game_id", "play_id", "nfl_id", "frame_id", "x", "y"]].copy()
# submission["x"] = submission["x"].astype(float)
# submission["y"] = submission["y"].astype(float)
# submission = submission.sort_values(["game_id", "play_id", "nfl_id", "frame_id"]).reset_index(drop=True)

# submission_path = "submission.csv"
# submission.to_csv(submission_path, index=False)

# # ------------------------------------------------------------
# # 7Ô∏è‚É£ Final checks and preview
# # ------------------------------------------------------------
# print(f"‚úÖ Submission file created: {submission_path}")
# print(f"üì¶ Total predictions: {len(submission):,}")
# print("\n--- Preview ---")
# display(submission.head(10))

# # optional: verify column structure
# assert list(submission.columns) == ["game_id", "play_id", "nfl_id", "frame_id", "x", "y"], "‚ùå Column mismatch!"


In [None]:
# from sklearn.metrics import mean_squared_error
# import numpy as np
# import pandas as pd

# # ============================================================
# # üßÆ Evaluate Local Submission Score (RMSE)
# # ============================================================

# def compute_local_rmse(pred_df: pd.DataFrame, true_df: pd.DataFrame):
#     """
#     Compute RMSE between predicted and true (x, y).
#     """
#     # Merge on keys
#     merged = pd.merge(
#         pred_df,
#         true_df[['game_id', 'play_id', 'nfl_id', 'frame_id', 'x', 'y']],
#         on=['game_id', 'play_id', 'nfl_id', 'frame_id'],
#         how='inner',
#         suffixes=('_pred', '_true')
#     )
    
#     if len(merged) == 0:
#         print("‚ö†Ô∏è No overlapping rows between predictions and ground truth.")
#         return np.nan
    
#     rmse_x = mean_squared_error(merged['x_true'], merged['x_pred'], squared=False)
#     rmse_y = mean_squared_error(merged['y_true'], merged['y_pred'], squared=False)
    
#     rmse_total = np.sqrt((rmse_x**2 + rmse_y**2) / 2)
    
#     print(f"RMSE_x: {rmse_x:.4f}")
#     print(f"RMSE_y: {rmse_y:.4f}")
#     print(f"‚úÖ Combined RMSE: {rmse_total:.4f}")
    
#     return rmse_total


# # ============================================================
# # üß© Example: Evaluate Using Evaluation Dataset
# # ============================================================

# # Locate your local "evaluation_solutions" data (contains true outputs)
# eval_candidates = [
#     "/kaggle/input/nfl-big-data-bowl-2026-prediction/evaluation_output.csv",
#     "/kaggle/input/nfl-big-data-bowl-2026/evaluation_output.csv",
#     "/mnt/data/output_2023_w01.csv",  # if you've uploaded
# ]

# found_eval = None
# for p in eval_candidates:
#     if os.path.exists(p):
#         found_eval = p
#         break

# if found_eval:
#     print(f"‚úÖ Using ground truth from: {found_eval}")
#     eval_true = pd.read_csv(found_eval)
    
#     # Read your submission file
#     submission = pd.read_csv("submission.csv")
    
#     # Compute score
#     print("\nüî¢ Computing local submission RMSE...")
#     score = compute_local_rmse(submission, eval_true)
    
#     print(f"\nüèÅ Estimated Local RMSE Score: {score:.5f}")
# else:
#     print("‚ö†Ô∏è Could not find evaluation ground truth file to compute local RMSE.")
