In [None]:
# =========================================================
# セル1: 基本セットアップ
# - 必要ライブラリのimport
# - フィールド定数、学習ハイパーパラメータ
# - 方向正規化や物理モデルなどの基本ユーティリティ関数
# =========================================================

import os
import warnings
warnings.filterwarnings("ignore")

# --------------------------------------
# Try cuDF pandas backend for speed (GPU上でのpandasアクセラレーション)
# 成功したらUSE_CUDF=Trueになる
# --------------------------------------
USE_CUDF = False
try:
    os.environ["CUDF_PANDAS_BACKEND"] = "cudf"
    import pandas as pd
    import numpy as np
    import cupy as cp  # optional
    USE_CUDF = True
    print("✅ cuDF pandas backend ENABLED (GPU DataFrame ops)")
except Exception:
    import pandas as pd
    import numpy as np
    print("cuDF backend not available -> using pandas (CPU)")

from pathlib import Path
from tqdm.auto import tqdm
from multiprocessing import Pool as MP_Pool, cpu_count

import pickle
from sklearn.metrics import mean_squared_error
from catboost import CatBoostRegressor, Pool as CatBoostPool

# -------------------
# データパス・定数
# -------------------
BASEDIR = Path("/kaggle/input/nfl-big-data-bowl-2026-prediction")

FIELD_LENGTH = 120.0
FIELD_WIDTH  = 53.3
FPS = 10.0  # frames/sec

N_FRAMES_KEEP = 10      # スナップ前の最後の最大frame数だけ残す
WINDOW_SIZE   = 3       # ラグ/ローリングの窓幅
SHORT_TIME_THRESHOLD = 1.0  # time_to_ball<1.0秒を"ショートパス系"モデルにする境界

# CatBoostハイパーパラメータ
ITERATIONS = 20000
LEARNING_RATE = 0.045
DEPTH = 9
L2_LEAF_REG = 3.0
BOOTSTRAP_TYPE = "Bayesian"
BAGGING_TEMPERATURE = 0.7
EARLY_STOPPING = 400
VERBOSE_EVAL = 200

SEEDS = [42, 2025, 7]
N_FOLDS = 2  # CVはweek17/18ブロック

def set_seed(seed=42):
    """できる範囲で乱数を固定する"""
    import random
    random.seed(seed)
    np.random.seed(seed)
    try:
        import torch
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    except Exception:
        pass
    os.environ['PYTHONHASHSEED'] = str(seed)

def normalize_orientation_inplace(df: pd.DataFrame) -> None:
    """
    攻撃方向を常に右(エンドゾーンx=120方向)にそろえる。
    left方向のプレイは左右反転+角度180度回転。
    """
    if 'play_direction' not in df.columns:
        return
    mask_left = df['play_direction'].astype(str).str.lower().eq('left')

    # 座標を左右反転
    if 'x' in df.columns:
        df.loc[mask_left, 'x'] = FIELD_LENGTH - df.loc[mask_left, 'x']
    if 'y' in df.columns:
        df.loc[mask_left, 'y'] = FIELD_WIDTH  - df.loc[mask_left, 'y']

    # 向き(dir,o)を180度ずらす
    for ang in ['dir', 'o']:
        if ang in df.columns:
            df.loc[mask_left, ang] = (df.loc[mask_left, ang] + 180) % 360

    # ボール落下地点も反転
    if 'ball_land_x' in df.columns:
        df.loc[mask_left, 'ball_land_x'] = FIELD_LENGTH - df.loc[mask_left, 'ball_land_x']
    if 'ball_land_y' in df.columns:
        df.loc[mask_left, 'ball_land_y'] = FIELD_WIDTH  - df.loc[mask_left, 'ball_land_y']


def compute_physics_xy(x, y, vx, vy, future_frame_id):
    """
    等速運動での単純な将来位置予測。
    frame_id(=何フレーム先か) / FPS で時間[s]に変換し、x+vx*t, y+vy*t を返す。
    """
    t = np.nan_to_num(pd.to_numeric(future_frame_id, errors='coerce')) / FPS
    px = x + vx * t
    py = y + vy * t
    px = np.clip(px, 0, FIELD_LENGTH)
    py = np.clip(py, 0, FIELD_WIDTH)
    return px, py


def role_bucket_id(player_role: str) -> int:
    """
    役割を3クラスにまとめる(バケット補正用)
      0: Targeted Receiver
      1: Defensive Coverage
      2: その他
    """
    if player_role == "Targeted Receiver":
        return 0
    if player_role == "Defensive Coverage":
        return 1
    return 2



In [None]:
# =========================================================
# セル2: 特徴量エンジニアリング & 前処理ユーティリティ
# - 全週の読み込み
# - 最終10フレーム抽出
# - 1フレームあたりの物理系/幾何系特徴量付与
# - ラグ/ローリング統計 (WINDOW_SIZE=3)
# - スナップ直前フレーム(最終フレーム)におけるQB/DFコンテキスト追加
# - 学習用テーブル(train_tbl)と推論用テーブル(test_tbl)を組み立てる
# - モデルに入れる特徴カラム選定
# - CV分割(week17/18)
# - バケット補正テーブルを作る下準備
# =========================================================

def load_week(week:int):
    df_in  = pd.read_csv(BASEDIR / f"train/input_2023_w{week:02d}.csv")
    df_out = pd.read_csv(BASEDIR / f"train/output_2023_w{week:02d}.csv")
    df_in['week']  = week
    df_out['week'] = week
    return df_in, df_out

def load_all_train():
    """1〜18週のinput/outputを全部読み込んで結合"""
    weeks = list(range(1,19))
    with MP_Pool(min(cpu_count(), 16)) as pool:
        res = list(tqdm(pool.imap(load_week, weeks), total=len(weeks)))
    ins  = [r[0] for r in res]
    outs = [r[1] for r in res]
    inp  = pd.concat(ins , ignore_index=True)
    outp = pd.concat(outs, ignore_index=True)
    return inp, outp

def load_test():
    """公開テスト(test_input/test)読み込み"""
    test_input = pd.read_csv(BASEDIR / "test_input.csv")
    test_df    = pd.read_csv(BASEDIR / "test.csv")
    return test_input, test_df


def keep_last_n_frames(df: pd.DataFrame, n_last=N_FRAMES_KEEP) -> pd.DataFrame:
    """
    各(game_id,play_id,nfl_id)ごとにフレームIDでソートし、末尾n_last個だけ残す。
    """
    if len(df) == 0:
        return df.copy()
    df = df.sort_values(['game_id','play_id','nfl_id','frame_id'])
    tail_idx = df.groupby(['game_id','play_id','nfl_id']).tail(n_last).index
    return df.loc[tail_idx].copy()


def add_perframe_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    各フレーム行に対して物理量・プレイヤー体格・ボール/エンドゾーン距離などの特徴量を付与。
    """
    if len(df) == 0:
        return df.copy()

    out = df.copy()

    # 欠損フラグ
    for col in ['s','a','o','dir','ball_land_x','ball_land_y','player_height']:
        if col in out.columns:
            out[f'isna_{col}'] = out[col].isna().astype(np.int8)

    # 速度・加速度ベクトル (dirに沿う)
    dir_rad = np.radians(out['dir'].fillna(0.0))
    spd = out['s'].fillna(0.0)
    acc = out['a'].fillna(0.0)

    out['velocity_x']     = spd * np.cos(dir_rad)
    out['velocity_y']     = spd * np.sin(dir_rad)
    out['acceleration_x'] = acc * np.cos(dir_rad)
    out['acceleration_y'] = acc * np.sin(dir_rad)

    # ボール着地点との距離・方向・ボール方向への接近速度
    if {'ball_land_x','ball_land_y','x','y'}.issubset(out.columns):
        dx = out['ball_land_x'] - out['x']
        dy = out['ball_land_y'] - out['y']
        dist = np.sqrt(dx*dx + dy*dy)
        out['dist_to_ball'] = dist
        out['angle_to_ball'] = np.arctan2(dy, dx)
        out['velocity_toward_ball'] = (
            out['velocity_x'] * np.cos(out['angle_to_ball']) +
            out['velocity_y'] * np.sin(out['angle_to_ball'])
        )
    else:
        out['dist_to_ball'] = np.nan
        out['angle_to_ball'] = np.nan
        out['velocity_toward_ball'] = np.nan

    # time_to_ball (フレーム数→秒)
    if 'num_frames_output' in out.columns:
        out['time_to_ball'] = out['num_frames_output'] / FPS
    else:
        out['time_to_ball'] = np.nan

    # 向き(o)と移動方向(dir)のズレ
    diff = np.abs(out['o'].fillna(0.0) - out['dir'].fillna(0.0))
    out['orientation_diff'] = np.minimum(diff, 360 - diff)

    # 役割フラグ
    out['role_targeted_receiver']  = (out['player_role'] == 'Targeted Receiver').astype(np.int8)
    out['role_defensive_coverage'] = (out['player_role'] == 'Defensive Coverage').astype(np.int8)
    out['role_passer']             = (out['player_role'] == 'Passer').astype(np.int8)
    out['side_offense']            = (out['player_side'] == 'Offense').astype(np.int8)

    # 体格/BMI
    ht = out['player_height'].fillna('0-0').astype(str).str.split('-', n=1, expand=True)
    ft  = pd.to_numeric(ht[0], errors='coerce').fillna(0)
    inc = pd.to_numeric(ht[1], errors='coerce').fillna(0)
    height_in = ft * 12 + inc
    out['height_inches'] = height_in
    with np.errstate(divide='ignore', invalid='ignore'):
        out['bmi'] = (out['player_weight'] / (np.maximum(height_in,1e-6)**2)) * 703

    # 運動エネルギーっぽい指標やフィールド上の位置関係
    out['speed_squared'] = spd**2
    out['accel_magnitude'] = np.sqrt(out['acceleration_x']**2 + out['acceleration_y']**2)
    out['velocity_alignment'] = np.cos(out['angle_to_ball'] - np.radians(out['dir'].fillna(0.0)))

    t_ball = out['time_to_ball'].fillna(0.0)
    out['expected_x_at_ball'] = out['x'] + out['velocity_x'] * t_ball
    out['expected_y_at_ball'] = out['y'] + out['velocity_y'] * t_ball
    out['error_from_ball_x'] = out['expected_x_at_ball'] - out['ball_land_x']
    out['error_from_ball_y'] = out['expected_y_at_ball'] - out['ball_land_y']
    out['error_from_ball']   = np.sqrt(out['error_from_ball_x']**2 + out['error_from_ball_y']**2)

    out['momentum_x'] = out['player_weight'].fillna(200.0) * out['velocity_x']
    out['momentum_y'] = out['player_weight'].fillna(200.0) * out['velocity_y']
    out['kinetic_energy'] = 0.5 * out['player_weight'].fillna(200.0) * out['speed_squared']

    ang_ball_deg = np.degrees(out['angle_to_ball'])
    ad = np.abs(out['o'].fillna(0.0) - ang_ball_deg)
    out['angle_diff'] = np.minimum(ad, 360 - ad)

    out['time_squared'] = out['time_to_ball']**2
    out['dist_squared'] = out['dist_to_ball']**2
    out['weighted_dist_by_time'] = out['dist_to_ball'] / (out['time_to_ball'] + 0.1)

    out['dist_to_sideline'] = np.minimum(out['y'], FIELD_WIDTH - out['y'])
    out['dist_to_endzone']  = FIELD_LENGTH - out['x']

    # 角度特徴のsin/cos展開
    for col in ['dir','o']:
        rad = np.radians(out[col].fillna(0.0))
        out[f'{col}_sin'] = np.sin(rad)
        out[f'{col}_cos'] = np.cos(rad)
    rad_ball = out['angle_to_ball'].fillna(0.0)
    out['angle_to_ball_sin'] = np.sin(rad_ball)
    out['angle_to_ball_cos'] = np.cos(rad_ball)

    return out


def add_sequence_features(df: pd.DataFrame, window=WINDOW_SIZE) -> pd.DataFrame:
    """
    各(game,play,nfl_id)の時系列内でラグ・ローリング平均/分散・速度変化量などを付与。
    """
    if len(df) == 0:
        return df.copy()

    out = df.sort_values(['game_id','play_id','nfl_id','frame_id']).copy()
    gcols = ['game_id','play_id','nfl_id']

    # ラグ
    for lag in [1,2,3][:window]:
        for col in ['x','y','velocity_x','velocity_y','s','a']:
            if col in out.columns:
                out[f'{col}_lag{lag}'] = out.groupby(gcols)[col].shift(lag)

    # ローリング平均/分散
    for col in ['x','y','velocity_x','velocity_y','s']:
        if col in out.columns:
            roll_mean = out.groupby(gcols)[col].rolling(window, min_periods=1).mean()
            roll_std  = out.groupby(gcols)[col].rolling(window, min_periods=1).std()
            roll_mean = roll_mean.reset_index(level=list(range(len(gcols))), drop=True)
            roll_std  = roll_std.reset_index(level=list(range(len(gcols))), drop=True)
            out[f'{col}_roll{window}'] = roll_mean
            out[f'{col}_std{window}']  = roll_std.fillna(0.0)

    # 速度変化(差分)
    for col in ['velocity_x','velocity_y']:
        if col in out.columns:
            out[f'{col}_delta'] = out.groupby(gcols)[col].diff()

    # 新規列のNaN埋め
    seq_cols = [
        c for c in out.columns
        if ('_lag' in c) or (f'_roll{window}' in c) or (f'_std{window}' in c) or c.endswith('_delta')
    ]
    out[seq_cols] = out[seq_cols].fillna(0.0)

    return out


def add_context_on_finalframe(final_snap: pd.DataFrame) -> pd.DataFrame:
    """
    最終フレーム(投げる直前)で:
    - QBとの距離/相対速度
    - 最も近いDFとの距離/相対速度
    - 半径3/5/7ydでのDF密度
    """
    snap = final_snap.copy()
    if len(snap) == 0:
        for c in [
            'dist_to_qb','rel_speed_to_qb',
            'nearest_defender_dist','nearest_defender_rel_speed',
            'def_count_r3','def_count_r5','def_count_r7'
        ]:
            snap[c] = 0.0
        return snap

    # 足りない列は作っておく
    need_cols = [
        'game_id','play_id','nfl_id','x','y',
        'velocity_x','velocity_y','player_role','player_side'
    ]
    for c in need_cols:
        if c not in snap.columns:
            snap[c] = np.nan

    new_cols = [
        'dist_to_qb','rel_speed_to_qb',
        'nearest_defender_dist','nearest_defender_rel_speed',
        'def_count_r3','def_count_r5','def_count_r7'
    ]
    for c in new_cols:
        snap[c] = np.nan

    # プレイごとにまとめて計算(ループOK: プレイ数はそこまで巨大じゃない)
    for (gid,pid), g in snap.groupby(['game_id','play_id']):
        idxs = g.index.values

        # QBを特定
        qb_mask = (g['player_role'].values == 'Passer')
        if qb_mask.any():
            qb_row = g[qb_mask].iloc[0]
            qb_x, qb_y = qb_row['x'], qb_row['y']
            qb_vx, qb_vy = qb_row['velocity_x'], qb_row['velocity_y']
        else:
            qb_x = qb_y = qb_vx = qb_vy = np.nan

        # DF集合 (ディフェンス側 or Coverage指定)
        def_mask = (g['player_side'].values == 'Defense') | \
                   (g['player_role'].values == 'Defensive Coverage')
        defenders = g[def_mask]

        def_xy  = defenders[['x','y']].to_numpy(dtype=float) if len(defenders)>0 else np.zeros((0,2))
        def_vel = defenders[['velocity_x','velocity_y']].to_numpy(dtype=float) if len(defenders)>0 else np.zeros((0,2))
        def_ids = defenders['nfl_id'].to_numpy(dtype=float) if len(defenders)>0 else np.zeros((0,))

        p_xy   = g[['x','y']].to_numpy(dtype=float)
        p_vel  = g[['velocity_x','velocity_y']].to_numpy(dtype=float)
        p_ids  = g['nfl_id'].to_numpy(dtype=float)

        # QBとの距離・相対接近速度
        if not np.isnan(qb_x):
            vec_qb = np.stack([qb_x - p_xy[:,0], qb_y - p_xy[:,1]], axis=1)
            dist_qb = np.linalg.norm(vec_qb, axis=1)
            u_qb = np.zeros_like(vec_qb)
            nz = dist_qb > 1e-6
            u_qb[nz] = vec_qb[nz] / dist_qb[nz,None]
            v_rel_qb = p_vel - np.array([qb_vx,qb_vy])
            rel_speed_qb = (v_rel_qb * u_qb).sum(axis=1)
        else:
            dist_qb = np.zeros(len(idxs))
            rel_speed_qb = np.zeros(len(idxs))

        # 最も近いDFの距離・相対接近速度・DF密度
        if def_xy.shape[0] > 0:
            diff = p_xy[:,None,:] - def_xy[None,:,:]   # [Nplayers,Ndef,2]
            dists = np.sqrt((diff**2).sum(axis=2))     # [Nplayers,Ndef]

            # 自分自身(同一nfl_id)を除外
            for ii, pidv in enumerate(p_ids):
                same = (def_ids == pidv)
                if same.any():
                    dists[ii, same] = np.inf

            min_idx = np.argmin(dists, axis=1)
            min_dist = dists[np.arange(len(min_idx)), min_idx]

            nn_vel = def_vel[min_idx] if def_vel.shape[0] else np.zeros_like(p_vel)

            vec_df = def_xy[min_idx] - p_xy
            norm_df= np.linalg.norm(vec_df, axis=1)
            u_df   = np.zeros_like(vec_df)
            nz2    = norm_df > 1e-6
            u_df[nz2] = vec_df[nz2] / norm_df[nz2,None]

            v_rel_df = p_vel - nn_vel
            rel_speed_df = (v_rel_df * u_df).sum(axis=1)

            def_counts_r3 = (dists <= 3.0).sum(axis=1)
            def_counts_r5 = (dists <= 5.0).sum(axis=1)
            def_counts_r7 = (dists <= 7.0).sum(axis=1)
        else:
            min_dist      = np.zeros(len(idxs))
            rel_speed_df  = np.zeros(len(idxs))
            def_counts_r3 = np.zeros(len(idxs))
            def_counts_r5 = np.zeros(len(idxs))
            def_counts_r7 = np.zeros(len(idxs))

        snap.loc[idxs, 'dist_to_qb']                 = dist_qb
        snap.loc[idxs, 'rel_speed_to_qb']            = rel_speed_qb
        snap.loc[idxs, 'nearest_defender_dist']      = min_dist
        snap.loc[idxs, 'nearest_defender_rel_speed'] = rel_speed_df
        snap.loc[idxs, 'def_count_r3']               = def_counts_r3
        snap.loc[idxs, 'def_count_r5']               = def_counts_r5
        snap.loc[idxs, 'def_count_r7']               = def_counts_r7

    fill_cols = [
        'dist_to_qb','rel_speed_to_qb',
        'nearest_defender_dist','nearest_defender_rel_speed',
        'def_count_r3','def_count_r5','def_count_r7'
    ]
    for c in fill_cols:
        snap[c] = snap[c].fillna(0.0)

    return snap


def build_training_table(frame_df: pd.DataFrame,
                         out_df: pd.DataFrame) -> pd.DataFrame:
    """
    学習セットを作る:
    - 各選手の「投球直前の最終フレーム」を1行にする
    - そのスナップショットに対し、将来フレーム(target_x,target_y)を紐づける
    - ターゲットも右方向座標系に正規化
    - role_bucket（Targeted/DefCoverage/Other）を付与
    """
    tmp = frame_df.copy()
    tmp['__fid__'] = pd.to_numeric(tmp['frame_id'], errors='coerce')
    idx_last = tmp.groupby(['game_id','play_id','nfl_id'])['__fid__'].idxmax()

    final_snap = frame_df.loc[idx_last].copy().reset_index(drop=True)

    # スコア対象の選手だけ
    if 'player_to_predict' in final_snap.columns:
        final_snap = final_snap[final_snap['player_to_predict'] == True].copy()

    # 上で定義したDF/QBコンテキストを付与
    final_snap = add_context_on_finalframe(final_snap)

    # 未来の実測トラッキング(out_df)と結合 (x,y -> target_x,target_y)
    outc = out_df.copy()
    outc = outc.rename(columns={'x':'target_x','y':'target_y'})
    merged = outc.merge(
        final_snap,
        on=['game_id','play_id','nfl_id'],
        how='left',
        suffixes=('','_snap')
    )

    # ターゲット座標も右方向化(=train_outputは左右混在なので左のやつは反転)
    if 'play_direction' in merged.columns:
        mask_left = merged['play_direction'].astype(str).str.lower().eq('left')
        merged['target_x'] = np.where(mask_left, FIELD_LENGTH - merged['target_x'], merged['target_x'])
        merged['target_y'] = np.where(mask_left, FIELD_WIDTH  - merged['target_y'], merged['target_y'])

    # role_bucket (バケット補正キーで使う)
    merged['role_bucket'] = merged['player_role'].apply(role_bucket_id).astype(np.int8)

    return merged.reset_index(drop=True)


def build_test_snapshot(test_input_df: pd.DataFrame,
                        test_df: pd.DataFrame) -> pd.DataFrame:
    """
    推論用スナップショットを作る(公開テストでの事前デバッグ用)。
    - train側と似た処理。ただしターゲット列はまだ無い。
    """
    ti = test_input_df.copy()
    normalize_orientation_inplace(ti)

    ti = keep_last_n_frames(ti, N_FRAMES_KEEP)
    ti_feat = add_perframe_features(ti)
    ti_feat = add_sequence_features(ti_feat, window=WINDOW_SIZE)

    # 各選手の最終フレーム行だけ抜き出す
    tmp = ti_feat.copy()
    tmp['__fid__'] = pd.to_numeric(tmp['frame_id'], errors='coerce')
    idx_last = tmp.groupby(['game_id','play_id','nfl_id'])['__fid__'].idxmax()
    final_snap = ti_feat.loc[idx_last].copy().reset_index(drop=True)

    # カラム名の整合性調整
    final_snap = final_snap.rename(columns={
        'frame_id': 'frame_id_y',
        'play_direction': 'play_direction_snap',
    })

    # DF/QBコンテキスト付与
    final_snap = add_context_on_finalframe(final_snap)

    # 公開test側のメタ(test.csv)と結合。frame_id_xが未来側フレームID
    test_local = test_df.copy().rename(columns={'frame_id':'frame_id_x'})
    merged = test_local.merge(
        final_snap,
        on=['game_id','play_id','nfl_id'],
        how='left'
    )

    # role_bucket
    merged['role_bucket'] = merged['player_role'].apply(role_bucket_id).astype(np.int8)

    # 元の方向を保持(あとで左右反転を戻すのに使う)
    merged['play_direction'] = merged['play_direction_snap']

    for c in ['time_to_ball','nearest_defender_dist']:
        if c in merged.columns:
            merged[c] = merged[c].fillna(0.0)

    return merged


def get_feature_columns(df_cols):
    """
    モデルに入れる説明変数カラム一覧を返す。
    """
    base_cols = [
        'x','y','s','a','o','dir','velocity_x','velocity_y','acceleration_x','acceleration_y',
        'dist_to_ball','angle_to_ball','velocity_toward_ball','time_to_ball','orientation_diff',
        'role_targeted_receiver','role_defensive_coverage','role_passer',
        'side_offense','height_inches','player_weight','bmi',
        'ball_land_x','ball_land_y','num_frames_output','frame_id',
        'speed_squared','accel_magnitude','velocity_alignment',
        'expected_x_at_ball','expected_y_at_ball',
        'error_from_ball_x','error_from_ball_y','error_from_ball',
        'momentum_x','momentum_y','kinetic_energy',
        'angle_diff','time_squared','dist_squared','weighted_dist_by_time',
        'dist_to_sideline','dist_to_endzone',
        'dist_to_qb','rel_speed_to_qb',
        'nearest_defender_dist','nearest_defender_rel_speed',
        'def_count_r3','def_count_r5','def_count_r7',
        'dir_sin','dir_cos','o_sin','o_cos',
        'angle_to_ball_sin','angle_to_ball_cos',
    ]

    seq_cols = []
    for lag in [1,2,3][:WINDOW_SIZE]:
        for c in ['x','y','velocity_x','velocity_y','s','a']:
            seq_cols.append(f'{c}_lag{lag}')
    for c in ['x','y','velocity_x','velocity_y','s']:
        seq_cols.append(f'{c}_roll{WINDOW_SIZE}')
        seq_cols.append(f'{c}_std{WINDOW_SIZE}')
    seq_cols += ['velocity_x_delta','velocity_y_delta']

    naflag_cols = [
        'isna_s','isna_a','isna_o','isna_dir',
        'isna_ball_land_x','isna_ball_land_y','isna_player_height'
    ]

    all_cols = base_cols + seq_cols + naflag_cols
    return [c for c in all_cols if c in df_cols]


def build_week_block_2fold(df_local: pd.DataFrame):
    """
    CV分割:
    - Fold1: week17をバリデーション
    - Fold2: week18をバリデーション
    同じプレイ(game_id,play_id)がtrainとvalに跨らないようグループ化。
    """
    df_local = df_local.reset_index(drop=True).copy()
    df_local['__group__'] = (
        df_local['game_id'].astype(str) + '_' + df_local['play_id'].astype(str)
    )

    grp_week = df_local.groupby('__group__')['week'].agg(lambda x: int(x.iloc[0]))
    grp_week = grp_week.to_dict()

    fold_weeks = [{17}, {18}]
    idx_all = np.arange(len(df_local))

    folds = []
    for wset in fold_weeks:
        val_groups = {g for g,wk in grp_week.items() if wk in wset}
        val_mask = df_local['__group__'].isin(val_groups).values
        val_idx  = idx_all[val_mask]
        tr_idx   = idx_all[~val_mask]
        folds.append((tr_idx, val_idx))
    return folds


def build_bucket_offset(oof_df: pd.DataFrame):
    """
    OOF(検証データでの予測)から、
    (time_to_ball_bin, nearest_defender_dist_bin, role_bucket)ごとの
    系統的なズレ(平均誤差ベクトル)を学習し、補正テーブルとして返す。
    """
    tbins = [0.0, 0.6, 1.2, 2.0, np.inf]
    dbins = [0.0, 2.0, 4.0, 6.0, np.inf]

    d = oof_df.copy()
    d['t_bin'] = pd.cut(d['time_to_ball'].fillna(0), tbins,
                        labels=False, include_lowest=True)
    d['d_bin'] = pd.cut(d['nearest_defender_dist'].fillna(99), dbins,
                        labels=False, include_lowest=True)

    d['err_x'] = d['target_x'] - d['pred_x']
    d['err_y'] = d['target_y'] - d['pred_y']

    grp = d.groupby(['t_bin','d_bin','role_bucket'])
    tab = grp[['err_x','err_y']].mean().reset_index()

    offset_dict = {}
    for _, r in tab.iterrows():
        key = (int(r.t_bin), int(r.d_bin), int(r.role_bucket))
        offset_dict[key] = (float(r.err_x), float(r.err_y))

    return offset_dict, tbins, dbins


In [None]:
# =========================================================
# セル3: 学習本体
# - 物理モデル(等速予測)との差分(残差)をCatBoostで学習
# - time_to_ball<1秒をSHORTモデル、それ以外をLONGモデルとして別々に学習
# - CVはweek17/18ブロック×複数seed、OOFからバケット補正テーブル作成
# - すべてまとめてbundle.pklに保存 (推論ノートブックで読む)
# =========================================================

def train_models_for_split(df_split: pd.DataFrame,
                           feature_cols: list):
    """
    指定された(ショートorロング)データでCatBoostアンサンブルを学習。
    物理的な等速予測→残差を学習し、OOFでバケット補正のための誤差統計も集計。
    """
    # 等速(physics)ベースライン
    bx  = df_split['x'].fillna(0).values
    by  = df_split['y'].fillna(0).values
    bvx = df_split['velocity_x'].fillna(0).values
    bvy = df_split['velocity_y'].fillna(0).values
    fr_future = pd.to_numeric(df_split['frame_id'], errors='coerce').fillna(0).values

    phys_x, phys_y = compute_physics_xy(bx, by, bvx, bvy, fr_future)

    tx = df_split['target_x'].values
    ty = df_split['target_y'].values

    yres_x = tx - phys_x
    yres_y = ty - phys_y

    base_rmse = np.sqrt(0.5 * (
        mean_squared_error(tx, phys_x) + mean_squared_error(ty, phys_y)
    ))
    print(f"[split] Physics baseline RMSE: {base_rmse:.4f}")

    Xmat = df_split[feature_cols].fillna(0).to_numpy(dtype=np.float32)

    folds = build_week_block_2fold(df_split)

    models_x_all = []
    models_y_all = []
    val_scores = []
    oof_parts = []

    for seed in SEEDS:
        print(f"  >> SEED {seed}")
        set_seed(seed)

        for fi, (tr_idx, va_idx) in enumerate(folds, start=1):
            print(f"     Fold {fi}/{len(folds)} tr={len(tr_idx)} va={len(va_idx)}")

            Xtr, Xva = Xmat[tr_idx], Xmat[va_idx]
            yx_tr, yx_va = yres_x[tr_idx], yres_x[va_idx]
            yy_tr, yy_va = yres_y[tr_idx], yres_y[va_idx]

            pool_tr_x = CatBoostPool(Xtr, yx_tr)
            pool_va_x = CatBoostPool(Xva, yx_va)
            pool_tr_y = CatBoostPool(Xtr, yy_tr)
            pool_va_y = CatBoostPool(Xva, yy_va)

            # x座標残差モデル
            model_x = CatBoostRegressor(
                iterations=ITERATIONS,
                learning_rate=LEARNING_RATE,
                depth=DEPTH,
                l2_leaf_reg=L2_LEAF_REG,
                random_seed=seed,
                task_type='GPU', devices='0',
                bootstrap_type=BOOTSTRAP_TYPE,
                bagging_temperature=BAGGING_TEMPERATURE,
                loss_function='RMSE',
                early_stopping_rounds=EARLY_STOPPING,
                verbose=VERBOSE_EVAL
            )
            model_x.fit(pool_tr_x, eval_set=pool_va_x, verbose=VERBOSE_EVAL)
            models_x_all.append(model_x)

            # y座標残差モデル
            model_y = CatBoostRegressor(
                iterations=ITERATIONS,
                learning_rate=LEARNING_RATE,
                depth=DEPTH,
                l2_leaf_reg=L2_LEAF_REG,
                random_seed=seed,
                task_type='GPU', devices='0',
                bootstrap_type=BOOTSTRAP_TYPE,
                bagging_temperature=BAGGING_TEMPERATURE,
                loss_function='RMSE',
                early_stopping_rounds=EARLY_STOPPING,
                verbose=VERBOSE_EVAL
            )
            model_y.fit(pool_tr_y, eval_set=pool_va_y, verbose=VERBOSE_EVAL)
            models_y_all.append(model_y)

            # 検証(va_idx)での予測 → 等速＋残差
            pred_x_resid = model_x.predict(Xva)
            pred_y_resid = model_y.predict(Xva)
            pred_x_full  = np.clip(phys_x[va_idx] + pred_x_resid, 0, FIELD_LENGTH)
            pred_y_full  = np.clip(phys_y[va_idx] + pred_y_resid, 0, FIELD_WIDTH)

            fold_rmse = np.sqrt(0.5 * (
                mean_squared_error(tx[va_idx], pred_x_full) +
                mean_squared_error(ty[va_idx], pred_y_full)
            ))
            print(f"        Fold {fi} RMSE={fold_rmse:.4f}")
            val_scores.append(fold_rmse)

            # OOFを蓄積して後でバケット補正テーブルを作る
            part = df_split.iloc[va_idx][[
                'time_to_ball','nearest_defender_dist','role_bucket',
                'target_x','target_y'
            ]].copy()
            part['pred_x'] = pred_x_full
            part['pred_y'] = pred_y_full
            oof_parts.append(part)

    cv_rmse = float(np.mean(val_scores))
    print(f"[split] CV RMSE folds×seeds: {cv_rmse:.4f} (physics {base_rmse:.4f})")

    oof_df = pd.concat(oof_parts, ignore_index=True)
    offset_dict, tbins, dbins = build_bucket_offset(oof_df)
    print(f"[split] bucket_offset entries: {len(offset_dict)}")

    bundle_piece = {
        "models_x": models_x_all,
        "models_y": models_y_all,
        "cv_rmse": cv_rmse,
        "physics_rmse": base_rmse,
        "bucket_offset": {
            "dict": offset_dict,
            "tbins": tbins,
            "dbins": dbins
        }
    }
    return bundle_piece


def create_and_save_bundle(output_path="/kaggle/working/bundle.pkl"):
    """
    学習のフルパイプライン:
      1. train/test入力読み込み
      2. 進行方向を右方向に正規化
      3. 最終Nフレーム抽出+特徴量生成(物理+ラグ/ローリング)
      4. 学習テーブル(train_tbl)作成
      5. time_to_ballでSHORT/LONGに分割してCatBoost学習
      6. SHORT/LONG両方のバケット補正をマージ
      7. bundle.pklとして保存（提出ノートで使う）
    """
    print(f"CPU cores visible: {cpu_count()} | cuDF backend: {USE_CUDF}")

    # --- データ読み込み ---
    print("Loading train & test...")
    train_input, train_output = load_all_train()
    test_input,  test_meta    = load_test()
    print("Shapes:")
    print("  train_input :", train_input.shape)
    print("  train_output:", train_output.shape)
    print("  test_input  :", test_input.shape)
    print("  test_meta   :", test_meta.shape)

    # 攻撃方向を右にそろえる
    normalize_orientation_inplace(train_input)
    normalize_orientation_inplace(test_input)

    # 最終Nフレームだけ残す(軽量化＋局所の動きに集中)
    train_cut = keep_last_n_frames(train_input, N_FRAMES_KEEP)
    test_cut  = keep_last_n_frames(test_input , N_FRAMES_KEEP)
    print("[CUT] train_cut:", train_cut.shape, " test_cut:", test_cut.shape)

    # 1フレーム特徴量+時系列特徴量
    train_feat = add_perframe_features(train_cut)
    train_feat = add_sequence_features(train_feat, window=WINDOW_SIZE)

    test_feat  = add_perframe_features(test_cut)
    test_feat  = add_sequence_features(test_feat, window=WINDOW_SIZE)

    # 学習テーブル(スナップショット+将来ターゲット)
    train_tbl = build_training_table(train_feat, train_output)
    print("[TRAIN_TBL]", train_tbl.shape)

    # テスト用スナップショット(公開データでの動作確認用)
    test_tbl  = build_test_snapshot(test_input, test_meta)
    print("[TEST_TBL ]", test_tbl.shape)

    # 使用特徴量カラムの抽出
    feature_cols = get_feature_columns(train_tbl.columns)
    print(f"feature_cols: {len(feature_cols)} cols")

    # time_to_ballでSHORT/LONGに分割
    short_mask = train_tbl['time_to_ball'] < SHORT_TIME_THRESHOLD
    train_short = train_tbl[short_mask].reset_index(drop=True)
    train_long  = train_tbl[~short_mask].reset_index(drop=True)
    print("[SPLIT] short:", len(train_short), " long:", len(train_long))

    # SHORTモデル学習
    print("\n=== TRAIN SHORT (time_to_ball < 1.0s) ===")
    short_bundle = train_models_for_split(train_short, feature_cols)

    # LONGモデル学習
    print("\n=== TRAIN LONG (time_to_ball >= 1.0s) ===")
    long_bundle  = train_models_for_split(train_long , feature_cols)

    # 推論用bundleを組み立て
    final_bundle = {
        "feature_cols": feature_cols,

        # 学習済みCatBoostモデル群(ショート/ロング、X/Yそれぞれ)
        "short_models_x": short_bundle["models_x"],
        "short_models_y": short_bundle["models_y"],
        "long_models_x":  long_bundle["models_x"],
        "long_models_y":  long_bundle["models_y"],

        # バケット補正テーブル(ショート/ロング両方を統合)
        "bucket_offset_3d": {},  # 後でマージ
        "tbins": short_bundle["bucket_offset"]["tbins"],
        "dbins": short_bundle["bucket_offset"]["dbins"],

        # 役割→role_bucketIDのマッピング (提出側でも使う)
        "role_map": {
            "Targeted Receiver": 0,
            "Defensive Coverage": 1,
            "__other__": 2
        }
    }

    # バケット補正dictを統合
    off_s = short_bundle["bucket_offset"]["dict"]
    off_l = long_bundle["bucket_offset"]["dict"]
    merged_off = {}
    merged_off.update(off_l)
    merged_off.update(off_s)
    final_bundle["bucket_offset_3d"] = merged_off

    # bundleを保存
    with open(output_path, "wb") as f:
        pickle.dump(final_bundle, f)

    print(f"[SAVED] bundle -> {output_path}")
    return final_bundle


# 実行: 学習して /kaggle/working/bundle.pkl を作る
BUNDLE = create_and_save_bundle("/kaggle/working/bundle.pkl")
print("Done. Bundle keys:", list(BUNDLE.keys()))


In [None]:
# =========================================================
# セル4: ローカル検証(任意)
# - 学習済みbundleを用いて、推論の後処理(バケット補正・左右反転戻し)まで
#   1プレー分で通す検証ヘルパー。
# - 提出セルと同じロジックをここでもう一度再現して
#   "少なくともローカルで動く"ことを確認する目的。
# - 本番Submitには不要。
# =========================================================

def apply_bucket_offset_batch(px_raw, py_raw,
                              t_ball_arr, def_dist_arr, role_b_arr,
                              offset_dict, tbins, dbins):
    adj_x = np.empty_like(px_raw, dtype=float)
    adj_y = np.empty_like(py_raw, dtype=float)
    for i in range(len(px_raw)):
        tval = float(np.nan_to_num(t_ball_arr[i]))
        dval = float(np.nan_to_num(def_dist_arr[i], nan=99.0))
        rb   = int(role_b_arr[i])
        tbin = np.digitize([tval], tbins, right=False)[0] - 1
        dbin = np.digitize([dval], dbins, right=False)[0] - 1
        tbin = int(np.clip(tbin, 0, len(tbins)-2))
        dbin = int(np.clip(dbin, 0, len(dbins)-2))
        off_x, off_y = offset_dict.get((tbin, dbin, rb), (0.0, 0.0))
        adj_x[i] = px_raw[i] + off_x
        adj_y[i] = py_raw[i] + off_y
    return adj_x, adj_y


def run_catboost_ensemble_for_rows(df_rows: pd.DataFrame,
                                   bundle,
                                   is_short_split: bool):
    """
    SHORT or LONG のどちらかのサブセットをまとめて推論する。
    """
    if len(df_rows) == 0:
        return np.array([],dtype=float), np.array([],dtype=float)

    feat_cols = bundle["feature_cols"]
    local = df_rows.copy()
    for c in feat_cols:
        if c not in local.columns:
            local[c] = 0.0
    Xtest = local[feat_cols].fillna(0.0).to_numpy(dtype=np.float32)

    bx  = local['x'].fillna(0.0).to_numpy()
    by  = local['y'].fillna(0.0).to_numpy()
    bvx = local['velocity_x'].fillna(0.0).to_numpy()
    bvy = local['velocity_y'].fillna(0.0).to_numpy()

    fr_future = pd.to_numeric(local['frame_id'], errors='coerce').fillna(0.0).to_numpy()
    phys_x, phys_y = compute_physics_xy(bx, by, bvx, bvy, fr_future)

    if is_short_split:
        mx_list = bundle["short_models_x"]
        my_list = bundle["short_models_y"]
    else:
        mx_list = bundle["long_models_x"]
        my_list = bundle["long_models_y"]

    pred_x_res_list = [m.predict(Xtest) for m in mx_list]
    pred_y_res_list = [m.predict(Xtest) for m in my_list]
    pred_x_resid = np.mean(pred_x_res_list, axis=0)
    pred_y_resid = np.mean(pred_y_res_list, axis=0)

    pred_x_raw = np.clip(phys_x + pred_x_resid, 0, FIELD_LENGTH)
    pred_y_raw = np.clip(phys_y + pred_y_resid, 0, FIELD_WIDTH)

    off_dict = bundle["bucket_offset_3d"]
    tbins    = bundle["tbins"]
    dbins    = bundle["dbins"]

    ttb = local['time_to_ball'].fillna(0.0).to_numpy()
    ndd = local['nearest_defender_dist'].fillna(99.0).to_numpy()
    rbk = local['role_bucket'].fillna(bundle["role_map"]["__other__"]).to_numpy()

    adj_x, adj_y = apply_bucket_offset_batch(
        pred_x_raw, pred_y_raw, ttb, ndd, rbk, off_dict, tbins, dbins
    )

    return adj_x, adj_y

print("セル4: 検証用ヘルパー定義完了（任意で利用）")


In [None]:
# =========================================================
# セル5: 提出用ノートブック
# - Kaggleの評価APIに合わせてpredict()とサーバを定義するセル
# - 学習済みbundle.pklを読み込み、time_to_ballでSHORT/LONGに分岐して予測
# - 最後に攻撃方向を元に戻して(x,y)を返す
#
# 実際のSubmitノートブックでは、このセルだけを使用する想定。
# あなたの自作dataset(=bundle.pklを含む)をAdd dataして、
# BUNDLE_CANDIDATESパスを合わせておくこと。
# run_local_gateway() でsubmission.csvをその場で生成できる。
# =========================================================

import os
import warnings
warnings.filterwarnings("ignore")

# cuDFアクセル(なくてもOKだが速いことがある)
USE_CUDF = False
try:
    os.environ["CUDF_PANDAS_BACKEND"] = "cudf"
    import pandas as pd
    import numpy as np
    import cupy as cp
    USE_CUDF = True
    print("✅ cuDF pandas backend ENABLED (GPU DataFrame ops)")
except Exception:
    import pandas as pd
    import numpy as np
    print("cuDF backend not available -> using pandas (CPU)")

import pickle
import polars as pl
from kaggle_evaluation import nfl_inference_server

FIELD_LENGTH = 120.0
FIELD_WIDTH  = 53.3
FPS = 10.0
N_FRAMES_KEEP = 10
WINDOW_SIZE   = 3
SHORT_TIME_THRESHOLD = 1.0

# 学習済みbundle.pklの候補パス
BUNDLE_CANDIDATES = [
    "/kaggle/input/nfl-bdb2026-bundle/bundle.pkl",   # ←自分のDataset名に合わせて更新
    "/kaggle/input/nfl-bdb2026-final/bundle.pkl",
    "/kaggle/input/bundle/bundle.pkl",
    "/kaggle/input/bundle.pkl",
    "/kaggle/working/bundle.pkl",  # ローカル動作テスト向け
]

_BUNDLE_CACHE = None


def _find_bundle_path():
    """bundle.pklを探す。見つからなければエラー。"""
    for cand in BUNDLE_CANDIDATES:
        if os.path.exists(cand):
            print(f"[INFO] Found bundle at: {cand}")
            return cand
    raise RuntimeError(
        "bundle.pkl が見つからない。'Add data' で学習済みモデル束Datasetを追加し、"
        "BUNDLE_CANDIDATES を正しいパスにして下さい。"
    )


def load_bundle_once():
    """bundle.pklを一度だけロードし、グローバルキャッシュに保持。"""
    global _BUNDLE_CACHE
    if _BUNDLE_CACHE is None:
        bp = _find_bundle_path()
        with open(bp, "rb") as f:
            _BUNDLE_CACHE = pickle.load(f)
        print("[INFO] bundle loaded. keys:", list(_BUNDLE_CACHE.keys()))
        req = [
            "feature_cols",
            "short_models_x","short_models_y",
            "long_models_x","long_models_y",
            "bucket_offset_3d","tbins","dbins",
            "role_map"
        ]
        for k in req:
            if k not in _BUNDLE_CACHE:
                raise RuntimeError(f"Missing key '{k}' in bundle.pkl")
    return _BUNDLE_CACHE


def normalize_orientation_inplace(df: pd.DataFrame):
    """
    推論時もtrainと同じく攻撃方向を右向きに正規化する。
    """
    if 'play_direction' not in df.columns:
        return
    mask_left = df['play_direction'].astype(str).str.lower().eq('left')
    if 'x' in df.columns:
        df.loc[mask_left, 'x'] = FIELD_LENGTH - df.loc[mask_left, 'x']
    if 'y' in df.columns:
        df.loc[mask_left, 'y'] = FIELD_WIDTH  - df.loc[mask_left, 'y']
    for ang in ['dir','o']:
        if ang in df.columns:
            df.loc[mask_left, ang] = (df.loc[mask_left, ang] + 180) % 360
    if 'ball_land_x' in df.columns:
        df.loc[mask_left, 'ball_land_x'] = FIELD_LENGTH - df.loc[mask_left, 'ball_land_x']
    if 'ball_land_y' in df.columns:
        df.loc[mask_left, 'ball_land_y'] = FIELD_WIDTH  - df.loc[mask_left, 'ball_land_y']


def compute_physics_xy(x, y, vx, vy, future_frame_id):
    """
    等速モデル(学習時と同じ)
    """
    t = np.nan_to_num(pd.to_numeric(future_frame_id, errors='coerce')) / FPS
    px = x + vx * t
    py = y + vy * t
    px = np.clip(px, 0, FIELD_LENGTH)
    py = np.clip(py, 0, FIELD_WIDTH)
    return px, py


def role_bucket_id_inference(player_role: str, role_map: dict) -> int:
    """
    提出サイドでも役割を3分類に落とす
    """
    if player_role == "Targeted Receiver":
        return role_map.get("Targeted Receiver", 0)
    if player_role == "Defensive Coverage":
        return role_map.get("Defensive Coverage", 1)
    return role_map.get("__other__", 2)


def keep_last_n_frames(df: pd.DataFrame, n_last=N_FRAMES_KEEP) -> pd.DataFrame:
    """
    (game,play,nfl_id)ごとに末尾nフレームのみ保持
    """
    if len(df) == 0:
        return df.copy()
    df = df.sort_values(['game_id','play_id','nfl_id','frame_id'])
    tail_idx = df.groupby(['game_id','play_id','nfl_id']).tail(n_last).index
    return df.loc[tail_idx].copy()


def add_perframe_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    学習時と同等の単発フレーム特徴量を付与
    """
    out = df.copy()
    if len(out)==0:
        return out

    for col in ['s','a','o','dir','ball_land_x','ball_land_y','player_height']:
        if col in out.columns:
            out[f'isna_{col}'] = out[col].isna().astype(np.int8)

    dir_rad = np.radians(out['dir'].fillna(0.0))
    spd = out['s'].fillna(0.0)
    acc = out['a'].fillna(0.0)

    out['velocity_x']     = spd * np.cos(dir_rad)
    out['velocity_y']     = spd * np.sin(dir_rad)
    out['acceleration_x'] = acc * np.cos(dir_rad)
    out['acceleration_y'] = acc * np.sin(dir_rad)

    if {'ball_land_x','ball_land_y','x','y'}.issubset(out.columns):
        dx = out['ball_land_x'] - out['x']
        dy = out['ball_land_y'] - out['y']
        dist = np.sqrt(dx*dx + dy*dy)
        out['dist_to_ball'] = dist
        out['angle_to_ball'] = np.arctan2(dy, dx)
        out['velocity_toward_ball'] = (
            out['velocity_x'] * np.cos(out['angle_to_ball']) +
            out['velocity_y'] * np.sin(out['angle_to_ball'])
        )
    else:
        out['dist_to_ball'] = np.nan
        out['angle_to_ball'] = np.nan
        out['velocity_toward_ball'] = np.nan

    if 'num_frames_output' in out.columns:
        out['time_to_ball'] = out['num_frames_output'] / FPS
    else:
        out['time_to_ball'] = np.nan

    diff = np.abs(out['o'].fillna(0.0) - out['dir'].fillna(0.0))
    out['orientation_diff'] = np.minimum(diff, 360 - diff)

    out['role_targeted_receiver']  = (out['player_role'] == 'Targeted Receiver').astype(np.int8)
    out['role_defensive_coverage'] = (out['player_role'] == 'Defensive Coverage').astype(np.int8)
    out['role_passer']             = (out['player_role'] == 'Passer').astype(np.int8)
    out['side_offense']            = (out['player_side'] == 'Offense').astype(np.int8)

    ht = out['player_height'].fillna('0-0').astype(str).str.split('-', n=1, expand=True)
    ft  = pd.to_numeric(ht[0], errors='coerce').fillna(0)
    inc = pd.to_numeric(ht[1], errors='coerce').fillna(0)
    height_in = ft * 12 + inc
    out['height_inches'] = height_in
    with np.errstate(divide='ignore', invalid='ignore'):
        out['bmi'] = (out['player_weight'] / (np.maximum(height_in,1e-6)**2)) * 703

    out['speed_squared'] = spd**2
    out['accel_magnitude'] = np.sqrt(out['acceleration_x']**2 + out['acceleration_y']**2)
    out['velocity_alignment'] = np.cos(out['angle_to_ball'] - np.radians(out['dir'].fillna(0.0)))

    t_ball = out['time_to_ball'].fillna(0.0)
    out['expected_x_at_ball'] = out['x'] + out['velocity_x'] * t_ball
    out['expected_y_at_ball'] = out['y'] + out['velocity_y'] * t_ball
    out['error_from_ball_x'] = out['expected_x_at_ball'] - out['ball_land_x']
    out['error_from_ball_y'] = out['expected_y_at_ball'] - out['ball_land_y']
    out['error_from_ball']   = np.sqrt(out['error_from_ball_x']**2 + out['error_from_ball_y']**2)

    out['momentum_x'] = out['player_weight'].fillna(200.0) * out['velocity_x']
    out['momentum_y'] = out['player_weight'].fillna(200.0) * out['velocity_y']
    out['kinetic_energy'] = 0.5 * out['player_weight'].fillna(200.0) * out['speed_squared']

    ang_ball_deg = np.degrees(out['angle_to_ball'])
    ad = np.abs(out['o'].fillna(0.0) - ang_ball_deg)
    out['angle_diff'] = np.minimum(ad, 360 - ad)

    out['time_squared'] = out['time_to_ball']**2
    out['dist_squared'] = out['dist_to_ball']**2
    out['weighted_dist_by_time'] = out['dist_to_ball'] / (out['time_to_ball'] + 0.1)

    out['dist_to_sideline'] = np.minimum(out['y'], FIELD_WIDTH - out['y'])
    out['dist_to_endzone']  = FIELD_LENGTH - out['x']

    for col in ['dir','o']:
        rad = np.radians(out[col].fillna(0.0))
        out[f'{col}_sin'] = np.sin(rad)
        out[f'{col}_cos'] = np.cos(rad)
    rad_ball = out['angle_to_ball'].fillna(0.0)
    out['angle_to_ball_sin'] = np.sin(rad_ball)
    out['angle_to_ball_cos'] = np.cos(rad_ball)

    return out


def add_sequence_features(df: pd.DataFrame, window=WINDOW_SIZE) -> pd.DataFrame:
    """
    学習時と同じラグ/ローリング/デルタ特徴を付与
    """
    if len(df)==0:
        return df.copy()

    out = df.sort_values(['game_id','play_id','nfl_id','frame_id']).copy()
    gcols = ['game_id','play_id','nfl_id']

    for lag in [1,2,3][:window]:
        for col in ['x','y','velocity_x','velocity_y','s','a']:
            if col in out.columns:
                out[f'{col}_lag{lag}'] = out.groupby(gcols)[col].shift(lag)

    for col in ['x','y','velocity_x','velocity_y','s']:
        if col in out.columns:
            roll_mean = out.groupby(gcols)[col].rolling(window, min_periods=1).mean()
            roll_std  = out.groupby(gcols)[col].rolling(window, min_periods=1).std()
            roll_mean = roll_mean.reset_index(level=list(range(len(gcols))), drop=True)
            roll_std  = roll_std.reset_index(level=list(range(len(gcols))), drop=True)
            out[f'{col}_roll{window}'] = roll_mean
            out[f'{col}_std{window}']  = roll_std.fillna(0.0)

    for col in ['velocity_x','velocity_y']:
        if col in out.columns:
            out[f'{col}_delta'] = out.groupby(gcols)[col].diff()

    seq_cols = [
        c for c in out.columns
        if ('_lag' in c) or (f'_roll{window}' in c) or (f'_std{window}' in c) or c.endswith('_delta')
    ]
    out[seq_cols] = out[seq_cols].fillna(0.0)

    return out


def add_context_on_finalframe(final_snap: pd.DataFrame) -> pd.DataFrame:
    """
    スナップ直前フレームでQB/DFの空間コンテキストを付与(学習時と同じ)
    """
    snap = final_snap.copy()
    if len(snap)==0:
        for c in [
            'dist_to_qb','rel_speed_to_qb',
            'nearest_defender_dist','nearest_defender_rel_speed',
            'def_count_r3','def_count_r5','def_count_r7'
        ]:
            snap[c]=0.0
        return snap

    need_cols = ['game_id','play_id','nfl_id','x','y',
                 'velocity_x','velocity_y','player_role','player_side']
    for c in need_cols:
        if c not in snap.columns:
            snap[c]=np.nan

    for c in [
        'dist_to_qb','rel_speed_to_qb',
        'nearest_defender_dist','nearest_defender_rel_speed',
        'def_count_r3','def_count_r5','def_count_r7'
    ]:
        snap[c]=np.nan

    for (gid,pid), g in snap.groupby(['game_id','play_id']):
        idxs = g.index.values

        qb_mask = (g['player_role'].values=='Passer')
        if qb_mask.any():
            qb_row = g[qb_mask].iloc[0]
            qb_x,qb_y = qb_row['x'],qb_row['y']
            qb_vx,qb_vy = qb_row['velocity_x'],qb_row['velocity_y']
        else:
            qb_x=qb_y=qb_vx=qb_vy=np.nan

        def_mask = (g['player_side'].values=='Defense') | (g['player_role'].values=='Defensive Coverage')
        defenders = g[def_mask]

        def_xy  = defenders[['x','y']].to_numpy(dtype=float) if len(defenders)>0 else np.zeros((0,2))
        def_vel = defenders[['velocity_x','velocity_y']].to_numpy(dtype=float) if len(defenders)>0 else np.zeros((0,2))
        def_ids = defenders['nfl_id'].to_numpy(dtype=float) if len(defenders)>0 else np.zeros((0,))

        p_xy   = g[['x','y']].to_numpy(dtype=float)
        p_vel  = g[['velocity_x','velocity_y']].to_numpy(dtype=float)
        p_ids  = g['nfl_id'].to_numpy(dtype=float)

        if not np.isnan(qb_x):
            vec_qb = np.stack([qb_x - p_xy[:,0], qb_y - p_xy[:,1]], axis=1)
            dist_qb = np.linalg.norm(vec_qb, axis=1)
            u_qb = np.zeros_like(vec_qb)
            nz = dist_qb>1e-6
            u_qb[nz] = vec_qb[nz]/dist_qb[nz,None]
            v_rel_qb = p_vel - np.array([qb_vx,qb_vy])
            rel_speed_qb = (v_rel_qb * u_qb).sum(axis=1)
        else:
            dist_qb = np.zeros(len(idxs))
            rel_speed_qb = np.zeros(len(idxs))

        if len(defenders)>0:
            diff = p_xy[:,None,:] - def_xy[None,:,:]
            dists= np.sqrt((diff**2).sum(axis=2))

            for ii,pidv in enumerate(p_ids):
                same=(def_ids==pidv)
                if same.any():
                    dists[ii, same]=np.inf

            min_idx = np.argmin(dists,axis=1)
            min_dist= dists[np.arange(len(min_idx)),min_idx]

            nn_vel = def_vel[min_idx] if def_vel.shape[0] else np.zeros_like(p_vel)

            vec_df = def_xy[min_idx]-p_xy
            norm_df= np.linalg.norm(vec_df,axis=1)
            u_df   = np.zeros_like(vec_df)
            nz2    = norm_df>1e-6
            u_df[nz2]=vec_df[nz2]/norm_df[nz2,None]

            v_rel_df = p_vel-nn_vel
            rel_speed_df = (v_rel_df*u_df).sum(axis=1)

            def_counts_r3 = (dists<=3.0).sum(axis=1)
            def_counts_r5 = (dists<=5.0).sum(axis=1)
            def_counts_r7 = (dists<=7.0).sum(axis=1)
        else:
            min_dist      = np.zeros(len(idxs))
            rel_speed_df  = np.zeros(len(idxs))
            def_counts_r3 = np.zeros(len(idxs))
            def_counts_r5 = np.zeros(len(idxs))
            def_counts_r7 = np.zeros(len(idxs))

        snap.loc[idxs,'dist_to_qb']                 = dist_qb
        snap.loc[idxs,'rel_speed_to_qb']            = rel_speed_qb
        snap.loc[idxs,'nearest_defender_dist']      = min_dist
        snap.loc[idxs,'nearest_defender_rel_speed'] = rel_speed_df
        snap.loc[idxs,'def_count_r3']               = def_counts_r3
        snap.loc[idxs,'def_count_r5']               = def_counts_r5
        snap.loc[idxs,'def_count_r7']               = def_counts_r7

    for c in [
        'dist_to_qb','rel_speed_to_qb',
        'nearest_defender_dist','nearest_defender_rel_speed',
        'def_count_r3','def_count_r5','def_count_r7'
    ]:
        snap[c]=snap[c].fillna(0.0)

    return snap


def build_snapshot_and_merge(test_df: pd.DataFrame,
                             test_input_df: pd.DataFrame,
                             role_map: dict) -> pd.DataFrame:
    """
    ゲートウェイから受け取ったバッチ(test, test_input)を、
    学習時と同じスナップショット特徴量の形式にまとめる。
    """
    # 攻撃方向を右向きへ正規化した上で、最後のNフレームだけ残す
    ti = test_input_df.copy()
    normalize_orientation_inplace(ti)
    ti = keep_last_n_frames(ti, N_FRAMES_KEEP)
    ti_feat = add_perframe_features(ti)
    ti_feat = add_sequence_features(ti_feat, window=WINDOW_SIZE)

    # 各選手について「最終フレーム(投球直前)」のみ残す
    tmp = ti_feat.copy()
    tmp['__fid__'] = pd.to_numeric(tmp['frame_id'], errors='coerce')
    idx_last = tmp.groupby(['game_id','play_id','nfl_id'])['__fid__'].idxmax()
    final_snap = ti_feat.loc[idx_last].copy().reset_index(drop=True)

    # 後で未来フレームIDと区別しやすいように名称を分ける
    final_snap = final_snap.rename(columns={
        'frame_id': 'frame_id_y',
        'play_direction': 'play_direction_snap',
    })

    # スナップ直前フレームにコンテキスト特徴(DF密度など)を注入
    final_snap = add_context_on_finalframe(final_snap)

    # Gatewayのtest(未来フレームごとの行)と結合
    test_local = test_df.copy().rename(columns={'frame_id':'frame_id_x'})
    merged = test_local.merge(
        final_snap,
        on=['game_id','play_id','nfl_id'],
        how='left'
    )

    # role_bucket(補正テーブルのキー)
    merged['role_bucket'] = merged['player_role'].apply(
        lambda r: role_bucket_id_inference(r, role_map)
    ).astype(np.int8)

    # 元の方向(左右)を保持しておく。最後に戻すため
    merged['play_direction'] = merged['play_direction_snap']

    # いくつかの特徴はNaNを0埋め
    for c in ['time_to_ball','nearest_defender_dist']:
        if c in merged.columns:
            merged[c] = merged[c].fillna(0.0)

    return merged


def apply_bucket_offset_batch(px_raw, py_raw,
                              t_ball_arr, def_dist_arr, role_b_arr,
                              offset_dict, tbins, dbins):
    """
    (time_to_ball_bin, defender_dist_bin, role_bucket)ごとの
    系統誤差オフセットを加えて最終補正する。
    """
    adj_x = np.empty_like(px_raw, dtype=float)
    adj_y = np.empty_like(py_raw, dtype=float)
    for i in range(len(px_raw)):
        tval = float(np.nan_to_num(t_ball_arr[i]))
        dval = float(np.nan_to_num(def_dist_arr[i], nan=99.0))
        rb   = int(role_b_arr[i])
        tbin = np.digitize([tval], tbins, right=False)[0]-1
        dbin = np.digitize([dval], dbins, right=False)[0]-1
        tbin = int(np.clip(tbin,0,len(tbins)-2))
        dbin = int(np.clip(dbin,0,len(dbins)-2))
        off_x, off_y = offset_dict.get((tbin,dbin,rb),(0.0,0.0))
        adj_x[i] = px_raw[i] + off_x
        adj_y[i] = py_raw[i] + off_y
    return adj_x, adj_y


def run_catboost_ensemble_for_rows(df_rows: pd.DataFrame,
                                   bundle,
                                   is_short_split: bool):
    """
    SHORT行 or LONG行 だけをまとめて推論する。
    """
    if len(df_rows)==0:
        return np.array([],dtype=float), np.array([],dtype=float)

    feat_cols = bundle["feature_cols"]
    local = df_rows.copy()
    for c in feat_cols:
        if c not in local.columns:
            local[c] = 0.0
    Xtest = local[feat_cols].fillna(0.0).to_numpy(dtype=np.float32)

    # 等速物理ベースライン
    bx  = local['x'].fillna(0.0).to_numpy()
    by  = local['y'].fillna(0.0).to_numpy()
    bvx = local['velocity_x'].fillna(0.0).to_numpy()
    bvy = local['velocity_y'].fillna(0.0).to_numpy()

    # 未来フレームオフセット(frame_id_y=スナップ時; frame_id_x=予測対象フレーム)
    if 'frame_id_y' in local.columns:
        horizon_frames = pd.to_numeric(local['frame_id_y'], errors='coerce').fillna(0.0).to_numpy()
    else:
        horizon_frames = pd.to_numeric(local.get('frame_id_x', 0), errors='coerce').fillna(0.0).to_numpy()

    phys_x, phys_y = compute_physics_xy(bx, by, bvx, bvy, horizon_frames)

    if is_short_split:
        models_x = bundle["short_models_x"]
        models_y = bundle["short_models_y"]
    else:
        models_x = bundle["long_models_x"]
        models_y = bundle["long_models_y"]

    # CatBoostアンサンブルの平均
    pred_x_res_list = [m.predict(Xtest) for m in models_x]
    pred_y_res_list = [m.predict(Xtest) for m in models_y]
    pred_x_resid = np.mean(pred_x_res_list, axis=0)
    pred_y_resid = np.mean(pred_y_res_list, axis=0)

    pred_x_raw = np.clip(phys_x + pred_x_resid, 0, FIELD_LENGTH)
    pred_y_raw = np.clip(phys_y + pred_y_resid, 0, FIELD_WIDTH)

    # バケット補正で系統バイアス修正
    off_dict = bundle["bucket_offset_3d"]
    tbins    = bundle["tbins"]
    dbins    = bundle["dbins"]

    ttb = local['time_to_ball'].fillna(0.0).to_numpy()
    ndd = local['nearest_defender_dist'].fillna(99.0).to_numpy()
    rbk = local['role_bucket'].fillna(bundle["role_map"]["__other__"]).to_numpy()

    adj_x, adj_y = apply_bucket_offset_batch(
        pred_x_raw, pred_y_raw,
        ttb, ndd, rbk,
        off_dict, tbins, dbins
    )

    return adj_x, adj_y


def infer_batch(test_pl: pl.DataFrame,
                test_input_pl: pl.DataFrame,
                bundle) -> pd.DataFrame:
    """
    Kaggle Gatewayから渡される1バッチ(test, test_input)に対して、
    最終的な(x,y)予測を返す。
    """
    test_df = test_pl.to_pandas()
    test_input_df = test_input_pl.to_pandas()

    merged = build_snapshot_and_merge(
        test_df=test_df,
        test_input_df=test_input_df,
        role_map=bundle["role_map"]
    )

    # time_to_ballでSHORT/LONGに分岐
    short_mask = merged['time_to_ball'] < SHORT_TIME_THRESHOLD
    long_mask  = ~short_mask

    merged_short = merged[short_mask].copy()
    merged_long  = merged[long_mask].copy()

    pred_sx, pred_sy = run_catboost_ensemble_for_rows(
        merged_short, bundle, is_short_split=True
    )
    pred_lx, pred_ly = run_catboost_ensemble_for_rows(
        merged_long,  bundle, is_short_split=False
    )

    merged['pred_x_right'] = np.nan
    merged['pred_y_right'] = np.nan
    merged.loc[short_mask,'pred_x_right'] = pred_sx
    merged.loc[short_mask,'pred_y_right'] = pred_sy
    merged.loc[long_mask ,'pred_x_right'] = pred_lx
    merged.loc[long_mask ,'pred_y_right'] = pred_ly

    # 攻撃方向を元(左/右)に戻す：学習時は右正規化なので、
    # leftプレイは反転して提出
    mask_left_orig = merged['play_direction'].astype(str).str.lower().eq('left')
    sub_x = np.where(mask_left_orig,
                     FIELD_LENGTH - merged['pred_x_right'],
                     merged['pred_x_right'])
    sub_y = np.where(mask_left_orig,
                     FIELD_WIDTH  - merged['pred_y_right'],
                     merged['pred_y_right'])

    out_df = pd.DataFrame({
        "x": sub_x,
        "y": sub_y
    }, index=merged.index)

    # Kaggle APIは test(=test_df)の順序と同じ長さを要求するので整形して返す
    out_df = out_df.reindex(test_df.index)
    return out_df


def predict(test: pl.DataFrame, test_input: pl.DataFrame) -> pl.DataFrame | pd.DataFrame:
    """
    Kaggle評価サーバから呼ばれるエントリポイント。
    test/test_inputはPolars DataFrame。
    同じ並び順でx,y列を返す必要がある。
    """
    bundle = load_bundle_once()
    preds_pd = infer_batch(test, test_input, bundle)
    assert isinstance(preds_pd, pd.DataFrame)
    assert len(preds_pd) == len(test)
    return preds_pd


# Kaggleが要求するインターフェース:
# - コンテスト本番環境では inference_server.serve() が呼ばれる
# - Notebook上でのローカルテストは run_local_gateway() で submission.csv を吐き出す
inference_server = nfl_inference_server.NFLInferenceServer(predict)

# 起動テスト:
_ = load_bundle_once()

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    # 本番評価モード
    inference_server.serve()
else:
    # ローカルテストモード: 公開データに対してpredict()を叩いて
    # submission.csv がワーキングディレクトリに生成される
    inference_server.run_local_gateway(("/kaggle/input/nfl-big-data-bowl-2026-prediction/",))
