split by StratifiedKFold

Per-fold RMSEs: [0.62349, 0.63324, 0.60064, 0.67419, 0.63202]

OOF CV RMSE: 0.63278

In [None]:
# ================================================================================
# NFL BIG DATA BOWL 2026 - COMPLETE WORKING SOLUTION
# Predicting player movement during pass plays with temporal features
# ================================================================================
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
import torch
import numpy as np
import pandas as pd
import warnings
import gc
from pathlib import Path
from tqdm.auto import tqdm
from scipy.ndimage import gaussian_filter1d
import joblib
from datetime import datetime
from itertools import combinations
# Machine Learning
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold, KFold
from tqdm import tqdm
from torch.optim.lr_scheduler import OneCycleLR
# Deep Learning
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import os
import math
from torch.optim.lr_scheduler import LinearLR, SequentialLR
warnings.filterwarnings('ignore')

# Config

In [None]:
class Config:
    DATA_DIR = Path("/kaggle/input/nfl-big-data-bowl-2026-prediction/")
    
    NN_PRETRAIN_DIR = '/kaggle/input/nfl-big-data-bowl-2026-public/bigru-public'
    SEED = 42
    FIELD_X_MIN, FIELD_X_MAX = 0.0, 120.0
    FIELD_Y_MIN, FIELD_Y_MAX = 0.0, 53.3
    N_FOLDS = 5
    USE_PLAYERS_INTERACTIONS =  True
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    WINDOW_SIZE = 12

In [None]:
def set_global_seeds(seed: int = 42):
    """Set seeds for reproducibility."""
    import random, os
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_global_seeds(Config.SEED)

In [None]:
def load_data(debug_fraction=1.0):
    """Load all training and test data with an option to use a fraction for debugging."""
    print("Loading data...")
    
    # Training data
    train_input_files = [Config.DATA_DIR / f"train/input_2023_w{w:02d}.csv" for w in range(1, 19)]
    train_output_files = [Config.DATA_DIR / f"train/output_2023_w{w:02d}.csv" for w in range(1, 19)]
    
    # Filter existing files
    train_input_files = [f for f in train_input_files if f.exists()]
    train_output_files = [f for f in train_output_files if f.exists()]
    
    print(f"Found {len(train_input_files)} weeks of data")
    
    # Load and concatenate with week column
    train_input = pd.concat(
        [pd.read_csv(f).assign(week=w) for w, f in enumerate(train_input_files, start=1)],
        ignore_index=True
    )
    train_output = pd.concat(
        [pd.read_csv(f).assign(week=w) for w, f in enumerate(train_output_files, start=1)],
        ignore_index=True
    )
    
    # Test data
    test_input = pd.read_csv(Config.DATA_DIR / "test_input.csv")
    test_template = pd.read_csv(Config.DATA_DIR / "test.csv")
    
    print(f"Loaded {len(train_input):,} input records, {len(train_output):,} output records")
    
    # Use only a fraction of the games for debugging (select entire games)
    if debug_fraction < 1.0:
        unique_game_ids = train_input['game_id'].unique()
        sampled_game_ids = pd.Series(unique_game_ids).sample(frac=debug_fraction, random_state=42).values
        train_input = train_input[train_input['game_id'].isin(sampled_game_ids)].reset_index(drop=True)
        train_output = train_output[train_output['game_id'].isin(sampled_game_ids)].reset_index(drop=True)
        print(f"Using {len(train_input):,} input records from {len(sampled_game_ids)} games for debugging")
    
    return train_input, train_output, test_input, test_template

# Metric

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error


class ParticipantVisibleError(Exception):
    pass




def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    """
    Compute RMSE for NFL competition.
    Expected input:
      - solution and submission as pandas.DataFrame
      - Column 'id': unique identifier for each (game_id, play_id, nfl_id, frame_id)
      - Column 'x'
      - Column 'y'
    Examples
    --------
    >>> import pandas as pd
    >>> row_id_column_name = 'id'
    >>> solution = pd.DataFrame({'id': ['21_12_2_1', '21_12_2_2', '21_12_2_3'], 'x': [1,2,3], 'y':[4,2,3]})
    >>> submission  = pd.DataFrame({'id': ['21_12_2_1', '21_12_2_2', '21_12_2_3'], 'x': [1.1,2,3], 'y':[4,2.2,3]})
    >>> round(score(solution, submission, row_id_column_name=row_id_column_name), 4)
    0.0913
    >>> submission  = pd.DataFrame({'id': ['21_12_2_1', '21_12_2_2', '21_12_2_3'], 'x': [0,2,3], 'y':[4,2.2,3]})
    >>> round(score(solution, submission, row_id_column_name=row_id_column_name), 4)
    0.4163
    >>> submission  = pd.DataFrame({'id': ['21_12_2_1', '21_12_2_2', '21_12_2_3'], 'x': [1,2,1], 'y':[4,0,3]})
    >>> round(score(solution, submission, row_id_column_name=row_id_column_name), 4)
    1.1547
    """

    TARGET = ['x', 'y']
    if row_id_column_name not in solution.columns:
        raise ParticipantVisibleError(f"Solution file missing required column: '{row_id_column_name}'")
    if row_id_column_name not in submission.columns:
        raise ParticipantVisibleError(f"Submission file missing required column: '{row_id_column_name}'")

    missing_in_solution = set(TARGET) - set(solution.columns)
    missing_in_submission = set(TARGET) - set(submission.columns)

    if missing_in_solution:
        raise ParticipantVisibleError(f'Solution file missing required columns: {missing_in_solution}')
    if missing_in_submission:
        raise ParticipantVisibleError(f'Submission file missing required columns: {missing_in_submission}')

    submission = submission[['id'] + TARGET]
    merged_df = pd.merge(solution, submission, on=row_id_column_name, suffixes=('_true', '_pred'))
    #log NaN
    nanx_in_pred = merged_df['x_pred'].isna().sum()
    nany_in_pred = merged_df['y_pred'].isna().sum()
    if nanx_in_pred > 0:
        print(f"WARNING: Found {nanx_in_pred} NaN predictions in merged results")
    if nany_in_pred > 0:
        print(f"WARNING: Found {nany_in_pred} NaN predictions in merged results")
    nanx_in_true = merged_df[merged_df['x_pred'].isna() | merged_df['y_pred'].isna()]['x_true'].isna().sum()
    nany_in_true = merged_df[merged_df['x_pred'].isna() | merged_df['y_pred'].isna()]['y_true'].isna().sum()
    if nanx_in_true > 0:
        print(f"WARNING: Found {nanx_in_true} NaN true values corresponding to NaN predictions")
    if nany_in_true > 0:
        print(f"WARNING: Found {nany_in_true} NaN true values corresponding to NaN predictions")
    rmse = np.sqrt(
        0.5 * (mean_squared_error(merged_df['x_true'], merged_df['x_pred']) + mean_squared_error(merged_df['y_true'], merged_df['y_pred']))
    )
    return float(rmse)

# Prepare features 

In [None]:
# ...existing code...
from collections import defaultdict

def _compute_interactions_for_play_frames(df_play_frames: pd.DataFrame) -> pd.DataFrame:
    """
    Compute interaction features per (game_id, play_id, frame_id, nfl_id) for selected players:
      - distance_to_player_mean/min/max_offense/defense
      - relative_velocity_magnitude_mean/min/max_offense/defense
      - angle_to_player_mean/min/max_offense/defense  (mean is circular)
      - nearest_opponent_dist/angle/rel_speed

    Only computes (emits rows) where 'player_to_predict' is True if that column exists.
    Otherwise computes for all players.

    df_play_frames contains rows for a single (game_id, play_id), multiple frame_ids and all players in those frames.
    """
    out_rows = []
    # Per frame to keep matrices tiny
    for (g, p, f), grp in df_play_frames.groupby(['game_id', 'play_id', 'frame_id'], sort=False):
        n = len(grp)
        if n == 0:
            continue

        nfl_ids = grp['nfl_id'].to_numpy()
        x  = grp['x'].to_numpy(dtype=np.float32)
        y  = grp['y'].to_numpy(dtype=np.float32)
        vx = grp['velocity_x'].to_numpy(dtype=np.float32)
        vy = grp['velocity_y'].to_numpy(dtype=np.float32)
        is_off = grp['is_offense'].to_numpy().astype(bool)

        compute_mask = grp['player_to_predict'].to_numpy().astype(bool) if 'player_to_predict' in grp.columns else np.ones(n, dtype=bool)

        # Pairwise geometry
        dx = x[None, :] - x[:, None]
        dy = y[None, :] - y[:, None]
        dist = np.sqrt(dx * dx + dy * dy)                  # (n,n)
        angle_mat = np.arctan2(-dy, -dx)                   # angle i->j (y_j - y_i, x_j - x_i)
        dvx = vx[:, None] - vx[None, :]
        dvy = vy[:, None] - vy[None, :]
        rel_speed = np.sqrt(dvx * dvx + dvy * dvy)         # (n,n)

        # Masks
        opp_mask = (is_off[:, None] != is_off[None, :])    # opponent pairs
        np.fill_diagonal(opp_mask, False)

        mask_off = np.broadcast_to(is_off[None, :], (n, n)).copy()
        mask_def = np.broadcast_to(~is_off[None, :], (n, n)).copy()
        np.fill_diagonal(mask_off, False)
        np.fill_diagonal(mask_def, False)

        # Nearest opponent
        dist_opp = np.where(opp_mask, dist, np.nan)
        nearest_dist = np.nanmin(dist_opp, axis=1)
        nearest_idx = np.nanargmin(dist_opp, axis=1)
        all_nan = ~np.isfinite(nearest_dist)
        nearest_idx_safe = nearest_idx.copy()
        nearest_idx_safe[all_nan] = 0
        nearest_angle = np.take_along_axis(angle_mat, nearest_idx_safe[:, None], axis=1).squeeze(1)
        nearest_rel   = np.take_along_axis(rel_speed, nearest_idx_safe[:, None], axis=1).squeeze(1)
        nearest_angle[all_nan] = np.nan
        nearest_rel[all_nan]   = np.nan

        # Group-wise aggregations
        # Distances
        d_off = np.where(mask_off, dist, np.nan)
        d_def = np.where(mask_def, dist, np.nan)
        d_mean_o = np.nanmean(d_off, axis=1); d_min_o = np.nanmin(d_off, axis=1); d_max_o = np.nanmax(d_off, axis=1)
        d_mean_d = np.nanmean(d_def, axis=1); d_min_d = np.nanmin(d_def, axis=1); d_max_d = np.nanmax(d_def, axis=1)

        # Relative speed
        v_off = np.where(mask_off, rel_speed, np.nan)
        v_def = np.where(mask_def, rel_speed, np.nan)
        v_mean_o = np.nanmean(v_off, axis=1); v_min_o = np.nanmin(v_off, axis=1); v_max_o = np.nanmax(v_off, axis=1)
        v_mean_d = np.nanmean(v_def, axis=1); v_min_d = np.nanmin(v_def, axis=1); v_max_d = np.nanmax(v_def, axis=1)

        # Angles: circular mean for mean, raw min/max for spread reference
        sinA = np.sin(angle_mat); cosA = np.cos(angle_mat)

        cnt_off = mask_off.sum(axis=1).astype(np.float32)
        cnt_def = mask_def.sum(axis=1).astype(np.float32)

        # Avoid divide-by-zero; set denom=nan where no neighbors
        denom_off = np.where(cnt_off > 0, cnt_off, np.nan)
        denom_def = np.where(cnt_def > 0, cnt_def, np.nan)

        sin_sum_off = (sinA * mask_off).sum(axis=1)
        cos_sum_off = (cosA * mask_off).sum(axis=1)
        sin_sum_def = (sinA * mask_def).sum(axis=1)
        cos_sum_def = (cosA * mask_def).sum(axis=1)

        a_mean_o = np.arctan2(sin_sum_off / denom_off, cos_sum_off / denom_off)
        a_mean_d = np.arctan2(sin_sum_def / denom_def, cos_sum_def / denom_def)

        a_off = np.where(mask_off, angle_mat, np.nan)
        a_def = np.where(mask_def, angle_mat, np.nan)
        a_min_o = np.nanmin(a_off, axis=1); a_max_o = np.nanmax(a_off, axis=1)
        a_min_d = np.nanmin(a_def, axis=1); a_max_d = np.nanmax(a_def, axis=1)

        # Emit only for players to predict
        for idx, nid in enumerate(nfl_ids):
            if not compute_mask[idx]:
                continue
            out_rows.append({
                'game_id': g, 'play_id': p, 'frame_id': f, 'nfl_id': int(nid),

                'distance_to_player_mean_offense': d_mean_o[idx],
                'distance_to_player_min_offense': d_min_o[idx],
                'distance_to_player_max_offense': d_max_o[idx],
                'relative_velocity_magnitude_mean_offense': v_mean_o[idx],
                'relative_velocity_magnitude_min_offense': v_min_o[idx],
                'relative_velocity_magnitude_max_offense': v_max_o[idx],
                'angle_to_player_mean_offense': a_mean_o[idx],
                'angle_to_player_min_offense': a_min_o[idx],
                'angle_to_player_max_offense': a_max_o[idx],

                'distance_to_player_mean_defense': d_mean_d[idx],
                'distance_to_player_min_defense': d_min_d[idx],
                'distance_to_player_max_defense': d_max_d[idx],
                'relative_velocity_magnitude_mean_defense': v_mean_d[idx],
                'relative_velocity_magnitude_min_defense': v_min_d[idx],
                'relative_velocity_magnitude_max_defense': v_max_d[idx],
                'angle_to_player_mean_defense': a_mean_d[idx],
                'angle_to_player_min_defense': a_min_d[idx],
                'angle_to_player_max_defense': a_max_d[idx],

                'nearest_opponent_dist': float(nearest_dist[idx]) if np.isfinite(nearest_dist[idx]) else np.nan,
                'nearest_opponent_angle': float(nearest_angle[idx]) if np.isfinite(nearest_angle[idx]) else np.nan,
                'nearest_opponent_rel_speed': float(nearest_rel[idx]) if np.isfinite(nearest_rel[idx]) else np.nan,
            })

    return pd.DataFrame(out_rows, columns=[
        'game_id', 'play_id', 'frame_id', 'nfl_id',
        'distance_to_player_mean_offense', 'distance_to_player_min_offense', 'distance_to_player_max_offense',
        'relative_velocity_magnitude_mean_offense', 'relative_velocity_magnitude_min_offense', 'relative_velocity_magnitude_max_offense',
        'angle_to_player_mean_offense', 'angle_to_player_min_offense', 'angle_to_player_max_offense',
        'distance_to_player_mean_defense', 'distance_to_player_min_defense', 'distance_to_player_max_defense',
        'relative_velocity_magnitude_mean_defense', 'relative_velocity_magnitude_min_defense', 'relative_velocity_magnitude_max_defense',
        'angle_to_player_mean_defense', 'angle_to_player_min_defense', 'angle_to_player_max_defense',
        'nearest_opponent_dist', 'nearest_opponent_angle', 'nearest_opponent_rel_speed'
    ])
# ...existing code...

In [None]:
def height_to_feet(height_str):
    """Convert height from 'ft-in' format to feet"""
    try:
        ft, inches = map(int, height_str.split('-'))
        return ft + inches/12
    except:
        return None


In [None]:
def add_advanced_features(df):
    """
    STEP 2: Add 30-40 advanced features
    These are proven to improve performance
    """
    print("Adding advanced features...")
    df = df.copy()
    df = df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
    gcols = ['game_id', 'play_id', 'nfl_id']
    
    # ==========================================
    # GROUP 1: Distance Rate Features (3)
    # ==========================================
    if 'distance_to_ball' in df.columns:
        df['distance_to_ball_change'] = df.groupby(gcols)['distance_to_ball'].diff().fillna(0)
        df['distance_to_ball_accel'] = df.groupby(gcols)['distance_to_ball_change'].diff().fillna(0)
        df['time_to_intercept'] = (df['distance_to_ball'] / 
                                    (np.abs(df['distance_to_ball_change']) + 0.1)).clip(0, 10)
    
    # ==========================================
    # GROUP 2: Target Alignment Features (3)
    # ==========================================
    if 'ball_direction_x' in df.columns:
        df['velocity_alignment'] = (
            df['velocity_x'] * df['ball_direction_x'] +
            df['velocity_y'] * df['ball_direction_y']
        )
        df['velocity_perpendicular'] = (
            df['velocity_x'] * (-df['ball_direction_y']) +
            df['velocity_y'] * df['ball_direction_x']
        )
        if 'acceleration_x' in df.columns:
            df['accel_alignment'] = (
                df['acceleration_x'] * df['ball_direction_x'] +
                df['acceleration_y'] * df['ball_direction_y']
            )
    
    # ==========================================
    # GROUP 3: Multi-Window Rolling (24)
    # ==========================================
    for window in [3, 5, 10]:
        for col in ['velocity_x', 'velocity_y', 's', 'a']:
            if col in df.columns:
                df[f'{col}_roll{window}'] = df.groupby(gcols)[col].transform(
                    lambda x: x.rolling(window, min_periods=1).mean()
                )
                df[f'{col}_std{window}'] = df.groupby(gcols)[col].transform(
                    lambda x: x.rolling(window, min_periods=1).std()
                ).fillna(0)
    
    # ==========================================
    # GROUP 4: Extended Lag Features (8)
    # ==========================================
    for lag in [4, 5]:
        for col in ['x', 'y', 'velocity_x', 'velocity_y']:
            if col in df.columns:
                df[f'{col}_lag{lag}'] = df.groupby(gcols)[col].shift(lag).fillna(0)
    
    # ==========================================
    # GROUP 5: Velocity Change Features (4)
    # ==========================================
    if 'velocity_x' in df.columns:
        df['velocity_x_change'] = df.groupby(gcols)['velocity_x'].diff().fillna(0)
        df['velocity_y_change'] = df.groupby(gcols)['velocity_y'].diff().fillna(0)
        df['speed_change'] = df.groupby(gcols)['s'].diff().fillna(0)
        df['direction_change'] = df.groupby(gcols)['dir'].diff().fillna(0)
        df['direction_change'] = df['direction_change'].apply(
            lambda x: x if abs(x) < 180 else x - 360 * np.sign(x)
        )
    
    # ==========================================
    # GROUP 6: Field Position Features (4)
    # ==========================================
    df['dist_from_left'] = df['y']
    df['dist_from_right'] = 53.3 - df['y']
    df['dist_from_sideline'] = np.minimum(df['dist_from_left'], df['dist_from_right'])
    df['dist_from_endzone'] = np.minimum(df['x'], 120 - df['x'])
    
    # ==========================================
    # GROUP 7: Role-Specific Features (3)
    # ==========================================
    if 'is_receiver' in df.columns and 'velocity_alignment' in df.columns:
        df['receiver_optimality'] = df['is_receiver'] * df['velocity_alignment']
        df['receiver_deviation'] = df['is_receiver'] * np.abs(df.get('velocity_perpendicular', 0))
    if 'is_coverage' in df.columns and 'closing_speed' in df.columns:
        df['defender_closing_speed'] = df['is_coverage'] * df['closing_speed']
    
    # ==========================================
    # GROUP 8: Time Features (2)
    # ==========================================
    df['frames_elapsed'] = df.groupby(gcols).cumcount()
    df['normalized_time'] = df.groupby(gcols)['frames_elapsed'].transform(
        lambda x: x / (x.max() + 1)
    )
    
    print(f"Total features after enhancement: {len(df.columns)}")
    
    return df

In [None]:
def prepare_sequences(input_df, output_df=None, test_template=None, 
                                            is_training=True, window_size=8,use_players_interactions=Config.USE_PLAYERS_INTERACTIONS):
    """
    Prepare sequences with ALL advanced features
    """
    print(f"\n{'='*80}")
    print(f"PREPARING SEQUENCES WITH ADVANCED FEATURES")
    print(f"{'='*80}")
    print(f"Window size: {window_size}")
    
    input_df = input_df.copy()
    
    # ==========================================
    # BASIC FEATURES
    # ==========================================
    print("Step 1/4: Adding basic features...")
    
    input_df['player_height_feet'] = input_df['player_height'].apply(height_to_feet)
    
    dir_rad = np.deg2rad(input_df['dir'].fillna(0))
    delta_t = 0.1
    input_df['velocity_x'] = (input_df['s'] + 0.5 * input_df['a'] * delta_t) * np.sin(dir_rad)
    input_df['velocity_y'] = (input_df['s'] + 0.5 * input_df['a'] * delta_t) * np.cos(dir_rad)
    input_df['acceleration_x'] = input_df['a'] * np.sin(dir_rad)
    input_df['acceleration_y'] = input_df['a'] * np.cos(dir_rad)
    input_df['o_sin'] = np.sin(np.deg2rad(input_df['o'].fillna(0)))
    input_df['o_cos'] = np.cos(np.deg2rad(input_df['o'].fillna(0)))
    input_df['dir_sin'] = np.sin(np.deg2rad(input_df['dir'].fillna(0)))
    input_df['dir_cos'] = np.cos(np.deg2rad(input_df['dir'].fillna(0)))
    # Roles
    input_df['is_offense'] = (input_df['player_side'] == 'Offense').astype(int)
    input_df['is_defense'] = (input_df['player_side'] == 'Defense').astype(int)
    input_df['is_receiver'] = (input_df['player_role'] == 'Targeted Receiver').astype(int)
    input_df['is_coverage'] = (input_df['player_role'] == 'Defensive Coverage').astype(int)
    input_df['is_passer'] = (input_df['player_role'] == 'Passer').astype(int)
    
    # Physics
    mass_kg = input_df['player_weight'].fillna(200.0) / 2.20462
    input_df['momentum_x'] = input_df['velocity_x'] * mass_kg
    input_df['momentum_y'] = input_df['velocity_y'] * mass_kg
    input_df['kinetic_energy'] = 0.5 * mass_kg * (input_df['s'] ** 2)
    
    # Ball features
    if 'ball_land_x' in input_df.columns:
        ball_dx = input_df['ball_land_x'] - input_df['x']
        ball_dy = input_df['ball_land_y'] - input_df['y']
        input_df['distance_to_ball'] = np.sqrt(ball_dx**2 + ball_dy**2)
        input_df['angle_to_ball'] = np.arctan2(ball_dy, ball_dx)
        input_df['ball_direction_x'] = ball_dx / (input_df['distance_to_ball'] + 1e-6)
        input_df['ball_direction_y'] = ball_dy / (input_df['distance_to_ball'] + 1e-6)
        input_df['closing_speed'] = (
            input_df['velocity_x'] * input_df['ball_direction_x'] +
            input_df['velocity_y'] * input_df['ball_direction_y']
        )
    
    # Sort for temporal
    input_df = input_df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
    gcols = ['game_id', 'play_id', 'nfl_id']
    
    # Original lag features (1-3)
    for lag in [1, 2, 3]:
        input_df[f'x_lag{lag}'] = input_df.groupby(gcols)['x'].shift(lag)
        input_df[f'y_lag{lag}'] = input_df.groupby(gcols)['y'].shift(lag)
        input_df[f'velocity_x_lag{lag}'] = input_df.groupby(gcols)['velocity_x'].shift(lag)
        input_df[f'velocity_y_lag{lag}'] = input_df.groupby(gcols)['velocity_y'].shift(lag)
    
    # EMA features
    input_df['velocity_x_ema'] = input_df.groupby(gcols)['velocity_x'].transform(
        lambda x: x.ewm(alpha=0.3, adjust=False).mean()
    )
    input_df['velocity_y_ema'] = input_df.groupby(gcols)['velocity_y'].transform(
        lambda x: x.ewm(alpha=0.3, adjust=False).mean()
    )
    input_df['speed_ema'] = input_df.groupby(gcols)['s'].transform(
        lambda x: x.ewm(alpha=0.3, adjust=False).mean()
    )
    
    # ==========================================
    # ADVANCED FEATURES (NEW!)
    # ==========================================
    print("Step 2/4: Adding advanced features...")
    input_df = add_advanced_features(input_df)
    # ==========================================
    # PLAYER INTERACTION FEATURES (NEW!)
    # ==========================================
    print("Step 3/4: Adding player interaction features...")
    if use_players_interactions:
        agg_rows = []
        # Group once (avoid overhead of apply per small group)
        for (g, p, f), grp in input_df.groupby(['game_id', 'play_id', 'frame_id'], sort=False):
            n = len(grp)
            nfl_ids = grp['nfl_id'].to_numpy()
            # Only compute/emit for player_to_predict==True (if column exists)
            compute_mask = grp['player_to_predict'].to_numpy().astype(bool) if 'player_to_predict' in grp.columns else np.ones(n, dtype=bool)
            if n < 2:
                # Create empty stats rows (NaNs) only for players to predict
                for nid in nfl_ids[compute_mask]:
                    agg_rows.append({
                        'game_id': g, 'play_id': p, 'frame_id': f, 'nfl_id': nid,
                        'distance_to_player_mean_offense': np.nan,
                        'distance_to_player_min_offense': np.nan,
                        'distance_to_player_max_offense': np.nan,
                        'relative_velocity_magnitude_mean_offense': np.nan,
                        'relative_velocity_magnitude_min_offense': np.nan,
                        'relative_velocity_magnitude_max_offense': np.nan,
                        'angle_to_player_mean_offense': np.nan,
                        'angle_to_player_min_offense': np.nan,
                        'angle_to_player_max_offense': np.nan,
                        'distance_to_player_mean_defense': np.nan,
                        'distance_to_player_min_defense': np.nan,
                        'distance_to_player_max_defense': np.nan,
                        'relative_velocity_magnitude_mean_defense': np.nan,
                        'relative_velocity_magnitude_min_defense': np.nan,
                        'relative_velocity_magnitude_max_defense': np.nan,
                        'angle_to_player_mean_defense': np.nan,
                        'angle_to_player_min_defense': np.nan,
                        'angle_to_player_max_defense': np.nan,
                        'nearest_opponent_dist': np.nan,
                        'nearest_opponent_angle': np.nan,
                        'nearest_opponent_rel_speed': np.nan,
                    })
                continue

            x = grp['x'].to_numpy(dtype=np.float32)
            y = grp['y'].to_numpy(dtype=np.float32)
            vx = grp['velocity_x'].to_numpy(dtype=np.float32)
            vy = grp['velocity_y'].to_numpy(dtype=np.float32)
            is_offense = grp['is_offense'].to_numpy()
            is_defense = grp['is_defense'].to_numpy()

            # Pairwise deltas (broadcast)
            dx = x[None, :] - x[:, None]        # (n,n) x_j - x_i reversed later for angle
            dy = y[None, :] - y[:, None]
            # Angle from i -> j (want y_j - y_i, x_j - x_i)
            angle_mat = np.arctan2(-dy, -dx)    # because dx currently x[None]-x[:,None] => -(x_j - x_i)

            # Distances
            dist = np.sqrt(dx ** 2 + dy ** 2)
            # Relative velocity magnitudes
            dvx = vx[:, None] - vx[None, :]
            dvy = vy[:, None] - vy[None, :]
            rel_speed = np.sqrt(dvx ** 2 + dvy ** 2)

            # Offense mask (exclude self)
            offense_mask = (is_offense[:, None] == is_offense[None, :])
            offense_mask = (is_offense[:, None] == is_offense[None, :])
            np.fill_diagonal(offense_mask, False)

            # Defense mask (exclude self)
            defense_mask = (is_defense[:, None] == is_defense[None, :])
            np.fill_diagonal(defense_mask, False)

            # Opponent mask (exclude self)
            opp_mask = (is_offense[:, None] != is_offense[None, :])
            np.fill_diagonal(opp_mask, False)

            # Mask out self distances
            dist_diag_nan = dist.copy()
            np.fill_diagonal(dist_diag_nan, np.nan)
            rel_diag_nan = rel_speed.copy()
            np.fill_diagonal(rel_diag_nan, np.nan)
            angle_diag_nan = angle_mat.copy()
            np.fill_diagonal(angle_diag_nan, np.nan)

            def masked_stats(mat, mask):
                # mat, mask shape (n,n)
                masked = np.where(mask, mat, np.nan)
                cnt = mask.sum(axis=1)
                mean = np.nanmean(masked, axis=1)
                amin = np.nanmin(masked, axis=1)
                amax = np.nanmax(masked, axis=1)
                # Rows with zero valid -> set nan
                zero = cnt == 0
                mean[zero] = np.nan; amin[zero] = np.nan; amax[zero] = np.nan
                return mean, amin, amax

            d_mean_o, d_min_o, d_max_o = masked_stats(dist_diag_nan, offense_mask)
            v_mean_o, v_min_o, v_max_o = masked_stats(rel_diag_nan, offense_mask)
            a_mean_o, a_min_o, a_max_o = masked_stats(angle_diag_nan, offense_mask)

            d_mean_d, d_min_d, d_max_d = masked_stats(dist_diag_nan, defense_mask)
            v_mean_d, v_min_d, v_max_d = masked_stats(rel_diag_nan, defense_mask)
            a_mean_d, a_min_d, a_max_d = masked_stats(angle_diag_nan, defense_mask)

            # NEW: nearest opponent stats
            masked_dist_opp = np.where(opp_mask, dist_diag_nan, np.nan)         # (n,n)
            nearest_dist = np.nanmin(masked_dist_opp, axis=1)                   # (n,)
            nearest_idx = np.nanargmin(masked_dist_opp, axis=1)                 # (n,)
            # Guard where all-NaN rows (no opponents)
            all_nan = ~np.isfinite(nearest_dist)
            nearest_idx_safe = nearest_idx.copy()
            nearest_idx_safe[all_nan] = 0
            nearest_angle = np.take_along_axis(angle_diag_nan, nearest_idx_safe[:, None], axis=1).squeeze(1)
            nearest_rel = np.take_along_axis(rel_diag_nan, nearest_idx_safe[:, None], axis=1).squeeze(1)
            nearest_angle[all_nan] = np.nan
            nearest_rel[all_nan] = np.nan

            for idx, nid in enumerate(nfl_ids):
                if not compute_mask[idx]:
                    continue  # only for player_to_predict==True
                agg_rows.append({
                    'game_id': g, 'play_id': p, 'frame_id': f, 'nfl_id': nid,
                    'distance_to_player_mean_offense': d_mean_o[idx],
                    'distance_to_player_min_offense': d_min_o[idx],
                    'distance_to_player_max_offense': d_max_o[idx],
                    'relative_velocity_magnitude_mean_offense': v_mean_o[idx],
                    'relative_velocity_magnitude_min_offense': v_min_o[idx],
                    'relative_velocity_magnitude_max_offense': v_max_o[idx],
                    'angle_to_player_mean_offense': a_mean_o[idx],
                    'angle_to_player_min_offense': a_min_o[idx],
                    'angle_to_player_max_offense': a_max_o[idx],
                    'distance_to_player_mean_defense': d_mean_d[idx],
                    'distance_to_player_min_defense': d_min_d[idx],
                    'distance_to_player_max_defense': d_max_d[idx],
                    'relative_velocity_magnitude_mean_defense': v_mean_d[idx],
                    'relative_velocity_magnitude_min_defense': v_min_d[idx],
                    'relative_velocity_magnitude_max_defense': v_max_d[idx],
                    'angle_to_player_mean_defense': a_mean_d[idx],
                    'angle_to_player_min_defense': a_min_d[idx],
                    'angle_to_player_max_defense': a_max_d[idx],
                    'nearest_opponent_dist': nearest_dist[idx],
                    'nearest_opponent_angle': nearest_angle[idx],
                    'nearest_opponent_rel_speed': nearest_rel[idx],
                })

        interaction_agg = pd.DataFrame(agg_rows)
        input_df = input_df.merge(
            interaction_agg,
            on=['game_id', 'play_id', 'frame_id', 'nfl_id'],
            how='left'
        )
    else:
        print("Skipping fast interaction feature computation (use_fast_interactions=False).")
    # ==========================================
    # FEATURE LIST (ENHANCED)
    # ==========================================
    print("Step 4/4: Creating sequences...")
    
    feature_cols = [
        # Core (6)
        'x', 'y', 's', 'a', 'ball_land_x', 'ball_land_y',

        # Angles encoded (4)
        'o_sin', 'o_cos', 'dir_sin', 'dir_cos',

        # Player (2)
        'player_height_feet', 'player_weight',
        
        # Motion (6)
        'velocity_x', 'velocity_y', 'acceleration_x', 'acceleration_y',
        'momentum_x', 'momentum_y', 'kinetic_energy',
        
        # Roles (5)
        'is_offense', 'is_defense', 'is_receiver', 'is_coverage', 'is_passer',
        
        # Ball (5)
        'distance_to_ball', 'angle_to_ball', 'ball_direction_x', 'ball_direction_y', 'closing_speed',
        
        # Original temporal (15)
        'x_lag1', 'y_lag1', 'velocity_x_lag1', 'velocity_y_lag1',
        'x_lag2', 'y_lag2', 'velocity_x_lag2', 'velocity_y_lag2',
        'x_lag3', 'y_lag3', 'velocity_x_lag3', 'velocity_y_lag3',
        'velocity_x_ema', 'velocity_y_ema', 'speed_ema',
        
        # NEW: Distance rate (3)
        'distance_to_ball_change', 'distance_to_ball_accel', 'time_to_intercept',
        
        # NEW: Target alignment (3)
        'velocity_alignment', 'velocity_perpendicular', 'accel_alignment',
        
        # NEW: Multi-window rolling (24)
        'velocity_x_roll3', 'velocity_x_std3', 'velocity_y_roll3', 'velocity_y_std3',
        's_roll3', 's_std3', 'a_roll3', 'a_std3',
        'velocity_x_roll5', 'velocity_x_std5', 'velocity_y_roll5', 'velocity_y_std5',
        's_roll5', 's_std5', 'a_roll5', 'a_std5',
        'velocity_x_roll10', 'velocity_x_std10', 'velocity_y_roll10', 'velocity_y_std10',
        's_roll10', 's_std10', 'a_roll10', 'a_std10',
        
        # NEW: Extended lags (8)
        'x_lag4', 'y_lag4', 'velocity_x_lag4', 'velocity_y_lag4',
        'x_lag5', 'y_lag5', 'velocity_x_lag5', 'velocity_y_lag5',
        
        # NEW: Velocity changes (4)
        'velocity_x_change', 'velocity_y_change', 'speed_change', 'direction_change',
        
        # NEW: Field position (4)
        'dist_from_sideline', 'dist_from_endzone',
        
        # NEW: Role-specific (3)
        'receiver_optimality', 'receiver_deviation', 'defender_closing_speed',
        
        # NEW: Time (2)
        'frames_elapsed', 'normalized_time',
        # New: Player interaction (21)
        'distance_to_player_mean_offense', 'distance_to_player_min_offense', 'distance_to_player_max_offense',
        'relative_velocity_magnitude_mean_offense', 'relative_velocity_magnitude_min_offense', 'relative_velocity_magnitude_max_offense',
        'angle_to_player_mean_offense', 'angle_to_player_min_offense', 'angle_to_player_max_offense',
        'distance_to_player_mean_defense', 'distance_to_player_min_defense', 'distance_to_player_max_defense',
        'relative_velocity_magnitude_mean_defense', 'relative_velocity_magnitude_min_defense', 'relative_velocity_magnitude_max_defense',
        'angle_to_player_mean_defense', 'angle_to_player_min_defense', 'angle_to_player_max_defense',
        'nearest_opponent_dist', 'nearest_opponent_angle', 'nearest_opponent_rel_speed',
    ]
    
    # Filter to existing
    feature_cols = [c for c in feature_cols if c in input_df.columns]
    print(f"Using {len(feature_cols)}")
    
    # ==========================================
    # CREATE SEQUENCES
    # ==========================================
    input_df.set_index(['game_id', 'play_id', 'nfl_id'], inplace=True)
    grouped = input_df.groupby(level=['game_id', 'play_id', 'nfl_id'])
    
    target_rows = output_df if is_training else test_template
    target_groups = target_rows[['game_id', 'play_id', 'nfl_id']].drop_duplicates()
    
    sequences, targets_dx, targets_dy, targets_frame_ids, sequence_ids = [], [], [], [], []
    
    for _, row in tqdm(target_groups.iterrows(), total=len(target_groups), desc="Creating sequences"):
        key = (row['game_id'], row['play_id'], row['nfl_id'])
        
        try:
            group_df = grouped.get_group(key)
        except KeyError:
            continue
        
        input_window = group_df.tail(window_size)
        
        if len(input_window) < window_size:
            # if is_training:
            #     print(f"Skipping sequence with insufficient history for {key}")
            #     continue
            print(f"Padding sequence with insufficient history for {key}")
            pad_len = window_size - len(input_window)
            first = input_window.iloc[0:1].copy()
            pad_df = pd.concat([first] * pad_len, ignore_index=True)
            input_window = pd.concat([pad_df, input_window], ignore_index=True)
        
        # fill within the window only, no future mean usage
        input_window = input_window.ffill().bfill()
        input_window = input_window.fillna(0.0)

        seq = input_window[feature_cols].values
        if np.isnan(seq).any():
            # print which sequence has NaNs
            print(f"NaNs found in sequence for {key}, replacing with 0.0")
            # ensure no NaNs go into the model
            seq = np.nan_to_num(seq, nan=0.0)
        
        sequences.append(seq)
        
        if is_training:
            out_grp = output_df[
                (output_df['game_id']==row['game_id']) &
                (output_df['play_id']==row['play_id']) &
                (output_df['nfl_id']==row['nfl_id'])
            ].sort_values('frame_id')
            
            last_x = input_window.iloc[-1]['x']
            last_y = input_window.iloc[-1]['y']
            
            dx = out_grp['x'].values - last_x
            dy = out_grp['y'].values - last_y
            
            targets_dx.append(dx)
            targets_dy.append(dy)
            targets_frame_ids.append(out_grp['frame_id'].values)
        
        sequence_ids.append({
            'game_id': key[0],
            'play_id': key[1],
            'nfl_id': key[2],
            'frame_id': input_window.iloc[-1]['frame_id']
        })
    
    print(f"Created {len(sequences)} sequences with {len(feature_cols)} features each")
    
    if is_training:
        return sequences, targets_dx, targets_dy, targets_frame_ids, sequence_ids, feature_cols
    return sequences, sequence_ids, feature_cols

In [None]:
print("Loading and preparing data...")
train_input, train_output, test_input, test_template = load_data(debug_fraction=1.0)

# Model class

In [None]:
class GaussianNoise(nn.Module):
    """Add Gaussian noise to input tensor"""
    def __init__(self, stddev):
        super().__init__()
        self.stddev = stddev
    
    def forward(self, x):
        if self.training:
            noise = torch.randn_like(x) * self.stddev
            return x + noise
        return x

## Sequence

In [None]:
class SeqModel(nn.Module):
    def __init__(self, input_dim, horizon):
        super().__init__()
        self.gru = nn.GRU(input_dim, 128, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
        self.pool_ln = nn.LayerNorm(256)
        self.pool_attn = nn.MultiheadAttention(256, num_heads=4, batch_first=True)
        self.pool_query = nn.Parameter(torch.randn(1, 1, 256))
        self.head = nn.Sequential(
            nn.Linear(256, 128), nn.GELU(), nn.Dropout(0.2), nn.Linear(128, horizon)
        )
    
    def forward(self, x):
        h, _ = self.gru(x)
        B = h.size(0)
        q = self.pool_query.expand(B, -1, -1)
        ctx, _ = self.pool_attn(q, self.pool_ln(h), self.pool_ln(h))
        out = self.head(ctx.squeeze(1))
        return torch.cumsum(out, dim=1)

# Submission maker

In [None]:
# ...existing code...
def build_axis_model_from_config(cfg):
    """
    Instantiate the SeqModel from a saved config.
    """
    input_dim = cfg['input_dim']
    horizon = cfg['horizon']
    return SeqModel(input_dim=input_dim, horizon=horizon)


def _model_tag_from_instance(model):
    if isinstance(model, SeqModel):
        return 'seq'
    return model.__class__.__name__.lower()


def create_model_save_config(model, input_dim, horizon):
    """
    Build a minimal config for re-instantiation of SeqModel.
    """
    return {
        'model': 'seq',
        'input_dim': int(input_dim),
        'horizon': int(horizon),
    }


def save_axis_checkpoint(model, cfg, fold_dir, axis_name='x'):
    """
    Save checkpoint for SeqModel.
    """
    cfg = dict(cfg or {})
    cfg['model'] = 'seq'
    path = Path(fold_dir) / f'axis_{axis_name}.pt'
    torch.save({'state_dict': model.state_dict(), 'config': cfg}, str(path))


def load_axis_checkpoint(fold_dir, axis_name='x', device=None):
    """
    Load SeqModel checkpoint.
    """
    device = device or Config.DEVICE
    ckpt_path = Path(fold_dir) / f'axis_{axis_name}.pt'
    ckpt = torch.load(str(ckpt_path), map_location=device)
    cfg = ckpt['config']
    state_dict = ckpt['state_dict']

    try:
        model = SeqModel(input_dim=cfg['input_dim'], horizon=cfg['horizon']).to(device)
        model.load_state_dict(state_dict, strict=True)
        model.eval()
        return model, cfg
    except Exception as e:
        print(f"[{axis_name}] SeqModel load failed ({e}); retrying with strict=False.")

    # Last resort: best-effort non-strict load
    model = SeqModel(input_dim=cfg['input_dim'], horizon=cfg['horizon']).to(device)
    missing, unexpected = model.load_state_dict(state_dict, strict=False)
    if missing or unexpected:
        print(f"[{axis_name}] Warning: missing={len(missing)}, unexpected={len(unexpected)} when loading non-strict.")
    model.eval()
    return model, cfg


def load_folds_xy(num_folds, models_dir=None, device=None):
    """
    Load SeqModel checkpoints for all folds.
    """
    device = device or Config.DEVICE
    base = Path(models_dir) if models_dir else Path('.')
    models_x, models_y, scalers, cfgs = [], [], [], []
    for fold in range(1, num_folds + 1):
        fold_dir = base / f'fold_{fold}'
        try:
            mx, cfgx = load_axis_checkpoint(fold_dir, 'x', device=device)
            my, cfgy = load_axis_checkpoint(fold_dir, 'y', device=device)
            scaler = joblib.load(str(fold_dir / 'lstm_feature_scaler_fold.joblib'))
            models_x.append(mx)
            models_y.append(my)
            scalers.append(scaler)
            cfgs.append(cfgx)
            print(f'Loaded fold {fold} OK')
        except Exception as e:
            print(f'Fold {fold} load failed: {e}')
    return models_x, models_y, scalers, cfgs
# ...existing code...

In [None]:
def create_ensemble_predictions_xy(
    models_x, models_y, scalers, X_test_unscaled, test_seq_ids, test_template, batch_size=1024
):
    """
    Ensemble test-time predictions using separate axis models (dx and dy) across folds.
    - models_x, models_y: lists of FlexibleSeqModel (same length, one per fold)
    - scalers: list of StandardScaler, aligned with models (or None entries)
    - X_test_unscaled: list/array of (T,F) sequences (unscaled)
    - test_seq_ids: list of dicts with keys [game_id, play_id, nfl_id, frame_id(last)]
    - test_template: DataFrame with required submission rows

    Returns: DataFrame with columns [id, x, y]
    """
    if len(models_x) == 0 or len(models_x) != len(models_y):
        print("No axis models or mismatched model counts.")
        return None
    if scalers is not None and len(scalers) != len(models_x):
        raise ValueError("Length of scalers must match number of folds (or be None).")

    # Convert sequences to array of objects for robust handling
    X_test_unscaled = np.array(X_test_unscaled, dtype=object)
    N = len(X_test_unscaled)

    # Last observed absolute positions from the sequences (assumes feat[0]=x, feat[1]=y)
    x_last = np.array([seq[-1, 0] for seq in X_test_unscaled], dtype=np.float32)
    y_last = np.array([seq[-1, 1] for seq in X_test_unscaled], dtype=np.float32)

    # Per-fold cumulative displacement predictions
    per_fold_dx = []
    per_fold_dy = []

    for i in range(len(models_x)):
        model_x = models_x[i]
        model_y = models_y[i]
        scaler = scalers[i] if scalers is not None else None

        # Scale per sequence for this fold
        if scaler is not None:
            scaled = np.array([scaler.transform(s) for s in X_test_unscaled], dtype=object)
        else:
            scaled = X_test_unscaled

        # Stack to (N,T,F)
        X = np.stack(scaled.astype(np.float32))
        device = next(model_x.parameters()).device
        ds = TensorDataset(torch.from_numpy(X))
        dl = DataLoader(ds, batch_size=batch_size, shuffle=False)

        dx_list, dy_list = [], []
        model_x.eval(); model_y.eval()
        with torch.no_grad():
            for (batch,) in dl:
                batch = batch.to(device)    # (B,T,F)
                dx = model_x(batch)         # (B,H)
                dy = model_y(batch)         # (B,H)
                dx_list.append(dx.cpu().numpy())
                dy_list.append(dy.cpu().numpy())
        dx_cum = np.vstack(dx_list)  # (N,H)
        dy_cum = np.vstack(dy_list)  # (N,H)

        per_fold_dx.append(dx_cum)
        per_fold_dy.append(dy_cum)

    # Ensemble by mean across folds
    ens_dx = np.mean(np.stack(per_fold_dx, axis=0), axis=0)  # (N,H)
    ens_dy = np.mean(np.stack(per_fold_dy, axis=0), axis=0)  # (N,H)

    # Create submission rows by mapping to test_template frame order per (game,play,nfl)
    test_meta = pd.DataFrame(test_seq_ids)
    out_rows = []
    H = ens_dx.shape[1]
    for i, seq_info in test_meta.iterrows():
        game_id = int(seq_info['game_id'])
        play_id = int(seq_info['play_id'])
        nfl_id = int(seq_info['nfl_id'])

        frame_ids = (
            test_template[
                (test_template['game_id'] == game_id) &
                (test_template['play_id'] == play_id) &
                (test_template['nfl_id'] == nfl_id)
            ]['frame_id'].sort_values().tolist()
        )
        for t, frame_id in enumerate(frame_ids):
            tt = t if t < H else H - 1
            px = np.clip(x_last[i] + ens_dx[i, tt], Config.FIELD_X_MIN, Config.FIELD_X_MAX)
            py = np.clip(y_last[i] + ens_dy[i, tt], Config.FIELD_Y_MIN, Config.FIELD_Y_MAX)
            out_rows.append({
                'id': f"{game_id}_{play_id}_{nfl_id}_{frame_id}",
                'x': px,
                'y': py
            })
    submission = pd.DataFrame(out_rows)
    return submission
# ...existing code...

In [None]:
def create_ensemble_val_predictions(models, scalers, X_val_unscaled, val_ids, y_val_dx_fold, y_val_dy_fold, val_data, exclude_fold=None):
    """
    Generate ensemble predictions for validation data and prepare for scoring.
    Excludes the model from the same fold to prevent potential overfitting/leakage.
    
    Args:
        models: List of trained models
        scalers: List of scalers (one per model)
        X_val_unscaled: Validation sequences (unscaled)
        val_ids: List of dicts with sequence metadata
        y_val_dx_fold, y_val_dy_fold: Ground truth displacements
        val_data: DataFrame with x_last, y_last
        exclude_fold: Index of the fold to exclude (0-based)
    
    Returns:
        ensemble_pred_df, ensemble_true_df: DataFrames for scoring
    """
    pred_rows = []
    true_rows = []
    
    for i, seq_info in enumerate(val_ids):
        game_id = seq_info['game_id']
        play_id = seq_info['play_id']
        nfl_id = seq_info['nfl_id']
        x_last = val_data.iloc[i]['x_last']
        y_last = val_data.iloc[i]['y_last']
        
        # Ground truth
        dx_true = y_val_dx_fold[i]
        dy_true = y_val_dy_fold[i]
        for t in range(len(dx_true)):
            frame_rel = t + 1
            true_x = x_last + dx_true[t]
            true_y = y_last + dy_true[t]
            true_rows.append({
                'id': f"{game_id}_{play_id}_{nfl_id}_{frame_rel}",
                'x': true_x,
                'y': true_y
            })
        
        # Ensemble predictions (exclude the model from the same fold)
        per_model_dx = []
        per_model_dy = []
        for j, model in enumerate(models):
            if exclude_fold is not None and j == exclude_fold:
                continue  # Skip the model trained on this fold
            scaler = scalers[j]
            scaled_seq = scaler.transform(X_val_unscaled[i]).astype(np.float32)
            scaled_seq = torch.tensor(scaled_seq).unsqueeze(0).to(next(model.parameters()).device)
            model.eval()
            with torch.no_grad():
                output = model(scaled_seq).cpu().numpy()[0]  # (max_frames_output, 2)
            per_model_dx.append(output[:, 0])
            per_model_dy.append(output[:, 1])
        
        # Average across remaining models
        if per_model_dx:  # Ensure there are models to average
            ens_dx = np.mean(per_model_dx, axis=0)
            ens_dy = np.mean(per_model_dy, axis=0)
        else:
            # Fallback: use the last known position (though this shouldn't happen with n_folds > 1)
            ens_dx = np.zeros(len(dx_true))
            ens_dy = np.zeros(len(dy_true))
        
        # Generate predictions for each frame
        for t in range(len(dx_true)):
            pred_x = x_last + ens_dx[t]
            pred_y = y_last + ens_dy[t]
            pred_rows.append({
                'id': f"{game_id}_{play_id}_{nfl_id}_{t+1}",
                'x': np.clip(pred_x, Config.FIELD_X_MIN, Config.FIELD_X_MAX),
                'y': np.clip(pred_y, Config.FIELD_Y_MIN, Config.FIELD_Y_MAX)
            })
    
    return pd.DataFrame(pred_rows), pd.DataFrame(true_rows)

# Infer

In [None]:

print(f"Loading pretrained models from {Config.NN_PRETRAIN_DIR}")
models_x_nn, models_y_nn, scalers, cfgs = load_folds_xy(num_folds=Config.N_FOLDS, models_dir=Config.NN_PRETRAIN_DIR, device=Config.DEVICE)



In [None]:
# Build test sequences
test_sequences, test_seq_ids,feature_cols = prepare_sequences(
    test_input, test_template=test_template, is_training=False, window_size=Config.WINDOW_SIZE
)
print(f"Prepared {len(test_sequences)} test sequences with shape: {test_sequences[0].shape}.")


In [None]:
# Use the trained per-fold axis models
submission_xy = create_ensemble_predictions_xy(
    models_x=models_x_nn,
    models_y=models_y_nn,
    scalers=scalers,
    X_test_unscaled=test_sequences,
    test_seq_ids=test_seq_ids,
    test_template=test_template,
    batch_size=1024
)
submission_xy.to_csv('submission.csv', index=False)
print("Saved submission.csv")

In [None]:
submission_xy