In [None]:
# ============================================================
# NFL Big Data Bowl 2026 ‚Äî Daniel GPU V10.3 (Hybrid Directional Blend)
#  - Base features: physics + dual rolling (3,7) + QB/Ball relative + bearings
#  - Two target streams:
#       ABS: dx = target_x - x_last, dy = target_y - y_last
#       DIR: dx_dir = dx * sign(play_direction), dy_dir = dy * sign(play_direction)
#  - Base models (GPU): LGB / XGB / CAT (per stream)
#  - Meta per stream: Ridge(positive, Œ± tuned) + LGB (nonlinear)
#  - Final: learn non-negative blend ABS vs DIR using OOF (after inverting DIR‚ÜíABS)
#  - Role-bias on the final blended OOF (absolute space)
#  - Output: /kaggle/working/submission.csv
# ============================================================

import os, gc, pickle, warnings
import numpy as np
import pandas as pd
import polars as pl
from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

warnings.filterwarnings("ignore")

# ----------------------------
# Config
# ----------------------------
class Cfg:
    DATA_DIR  = "/kaggle/input/nfl-big-data-bowl-2026-prediction/"
    OUT_DIR   = "/kaggle/working/daniel-gpu-v10_3/"
    MODEL_DIR = os.path.join("/kaggle/working/daniel-gpu-v10_3/", "models")
    SEED      = 1339
    N_FOLDS   = 5
    USE_GROUP_KF = True
    ROLE_BIAS_SHRINK = 50.0     # shrink group residuals toward global mean
    META_COMPARE = True          # print meta OOFs for each stream

os.makedirs(Cfg.OUT_DIR, exist_ok=True)
os.makedirs(Cfg.MODEL_DIR, exist_ok=True)

def seed_everything(seed=1339):
    import random
    random.seed(seed); np.random.seed(seed)
seed_everything(Cfg.SEED)

def rmse(a,b): return float(np.sqrt(mean_squared_error(a,b)))

In [None]:
# ============================================================
# Helpers
# ============================================================
def detect_target_cols(df_out_pd: pd.DataFrame):
    cols = set(df_out_pd.columns)
    if {"target_x","target_y"}.issubset(cols): return "target_x","target_y"
    if {"ball_land_x","ball_land_y"}.issubset(cols): return "ball_land_x","ball_land_y"
    if {"x","y"}.issubset(cols): return "x","y"
    raise RuntimeError(f"Could not detect targets in output; columns={list(cols)[:20]}")

def to_seconds_frames(df: pl.DataFrame) -> pl.DataFrame:
    if "frame_offset" not in df.columns:
        df = df.with_columns(pl.col("frame_id").cast(pl.Float64).alias("frame_offset"))
    if "num_frames_output" in df.columns:
        df = df.with_columns(pl.col("num_frames_output").cast(pl.Float64).clip(1.0,None).alias("T"))
    else:
        df = df.with_columns(pl.lit(5.0).alias("T"))
    return df.with_columns([
        (pl.col("frame_offset")/10.0).alias("time_offset"),
        (pl.col("frame_offset")/pl.col("T")).alias("frame_ratio"),
    ])

In [None]:
# ============================================================
# Feature Engineering
# ============================================================
def derive_temporal_features(df: pl.DataFrame) -> pl.DataFrame:
    if {"x","y"}.issubset(df.columns):
        df = df.with_columns([
            pl.col("x").diff().over(["game_id","play_id","nfl_id"]).alias("dx_frame"),
            pl.col("y").diff().over(["game_id","play_id","nfl_id"]).alias("dy_frame"),
        ])
        df = df.with_columns([
            (pl.col("dx_frame")*10.0).alias("speed_x_est"),
            (pl.col("dy_frame")*10.0).alias("speed_y_est"),
        ])
        df = df.with_columns([
            pl.col("speed_x_est").diff().over(["game_id","play_id","nfl_id"]).alias("accel_x_est"),
            pl.col("speed_y_est").diff().over(["game_id","play_id","nfl_id"]).alias("accel_y_est"),
        ])
        for c in ["dx_frame","dy_frame","speed_x_est","speed_y_est","accel_x_est","accel_y_est"]:
            df = df.with_columns(pl.col(c).fill_null(strategy="forward").fill_null(strategy="backward"))
        df = df.with_columns([
            ( (pl.col("speed_x_est")**2 + pl.col("speed_y_est")**2).sqrt() ).alias("v_mag"),
            ( (pl.col("accel_x_est")**2 + pl.col("accel_y_est")**2).sqrt() ).alias("a_mag_est"),
        ])
    return df

def add_dual_rolling(df: pl.DataFrame, ks=(3,7)) -> pl.DataFrame:
    df = df.sort(["game_id","play_id","nfl_id","frame_id"])
    base = [c for c in ["x","y","s","a","dir","o","v_mag","a_mag_est"] if c in df.columns]
    if not base: return df
    for k in ks:
        df = df.with_columns([
            pl.col(c).rolling_mean(window_size=k, min_periods=1)
                     .over(["game_id","play_id","nfl_id"]).alias(f"{c}_roll{k}") for c in base
        ])
    return df

def aggregate_per_player(df: pl.DataFrame) -> pl.DataFrame:
    if "s" in df.columns and "player_weight" in df.columns:
        df = df.with_columns((pl.col("s") * pl.col("player_weight")).alias("momentum"))
    if "a" in df.columns and "s" in df.columns:
        df = df.with_columns((pl.col("a") / (pl.col("s") + 1e-3)).alias("accel_ratio"))
    if {"dir","o"}.issubset(df.columns):
        df = df.with_columns((pl.col("dir") - pl.col("o")).abs().alias("dir_diff"))
    if "dir" in df.columns:
        df = df.with_columns(
            pl.col("dir").diff().over(["game_id","play_id","nfl_id"]).abs().alias("angular_vel")
        ).with_columns(pl.col("angular_vel").fill_null(0.0))
    if {"s","player_weight"}.issubset(df.columns):
        df = df.with_columns(0.5 * (pl.col("player_weight") * (pl.col("s")**2)).alias("kinetic_energy"))
    if "absolute_yardline_number" in df.columns:
        df = df.with_columns((pl.col("absolute_yardline_number")/100.0).alias("yard_norm"))
    if "player_side" in df.columns:
        df = df.with_columns((pl.col("player_side")=="left").cast(pl.Int8).alias("side_left"))

    df = derive_temporal_features(df)
    df = add_dual_rolling(df, ks=(3,7))

    feats = ["x","y","s","a","o","dir",
             "dx_frame","dy_frame","speed_x_est","speed_y_est",
             "accel_x_est","accel_y_est","v_mag","a_mag_est",
             "x_roll3","y_roll3","s_roll3","a_roll3","dir_roll3","o_roll3","v_mag_roll3","a_mag_est_roll3",
             "x_roll7","y_roll7","s_roll7","a_roll7","dir_roll7","o_roll7","v_mag_roll7","a_mag_est_roll7",
             "momentum","accel_ratio","dir_diff","angular_vel","kinetic_energy",
             "yard_norm","side_left"]
    feats = [f for f in feats if f in df.columns]

    agg_exprs = [pl.count().alias("num_frames"), pl.col("frame_id").max().alias("max_frame_id")]
    for f in feats:
        agg_exprs += [pl.col(f).mean().alias(f"{f}_mean"),
                      pl.col(f).std().alias(f"{f}_std"),
                      pl.col(f).min().alias(f"{f}_min"),
                      pl.col(f).max().alias(f"{f}_max")]
    out = df.group_by(["game_id","play_id","nfl_id"]).agg(agg_exprs)

    last_xy = (df.group_by(["game_id","play_id","nfl_id"]).tail(1)
                 .select(["game_id","play_id","nfl_id","x","y"])
                 .rename({"x":"x_last","y":"y_last"}))
    return out.join(last_xy, on=["game_id","play_id","nfl_id"], how="left")

def qb_last_xy(df: pl.DataFrame) -> pl.DataFrame:
    has_pos = "player_position" in df.columns
    has_role = "player_role" in df.columns
    if has_pos and has_role:
        filt = (pl.col("player_position")=="QB") | (pl.col("player_role")=="passer")
    elif has_pos:
        filt = (pl.col("player_position")=="QB")
    elif has_role:
        filt = (pl.col("player_role")=="passer")
    else:
        return pl.DataFrame({"game_id":pl.Series([], pl.Int64),
                             "play_id":pl.Series([], pl.Int64),
                             "qb_x_last":pl.Series([], pl.Float64),
                             "qb_y_last":pl.Series([], pl.Float64)})
    qb = df.filter(filt)
    if qb.height == 0:
        return pl.DataFrame({"game_id":pl.Series([], pl.Int64),
                             "play_id":pl.Series([], pl.Int64),
                             "qb_x_last":pl.Series([], pl.Float64),
                             "qb_y_last":pl.Series([], pl.Float64)})
    qb_last = (qb.group_by(["game_id","play_id","nfl_id"]).tail(1)
                 .select(["game_id","play_id","x","y"])
                 .group_by(["game_id","play_id"])
                 .agg([pl.col("x").mean().alias("qb_x_last"),
                       pl.col("y").mean().alias("qb_y_last")]))
    return qb_last

def const_first_per_player(df: pl.DataFrame) -> pl.DataFrame:
    const_cols = ["game_id","play_id","nfl_id","absolute_yardline_number","player_weight",
                  "player_position","player_role","player_side","play_direction",
                  "ball_land_x","ball_land_y","num_frames_output"]
    keep = [c for c in const_cols if c in df.columns]
    return df.group_by(["game_id","play_id","nfl_id"]).first().select(keep)

In [None]:
# ============================================================
# Load Train Data
# ============================================================
print("Loading training data with overlap filtering ...")
train_dir = os.path.join(Cfg.DATA_DIR, "train")
inputs, outputs = [], []

for w in range(1,19):
    fi = f"{train_dir}/input_2023_w{w:02d}.csv"
    fo = f"{train_dir}/output_2023_w{w:02d}.csv"
    if not (os.path.exists(fi) and os.path.exists(fo)):
        print(f"Week {w:02d}: missing files ‚Äì skip")
        continue

    df_i, df_o = pl.read_csv(fi), pd.read_csv(fo)
    if "nflId" in df_i.columns: df_i = df_i.rename({"nflId":"nfl_id"})
    if "nflId" in df_o.columns: df_o = df_o.rename(columns={"nflId":"nfl_id"})

    gi_in, gi_out = set(df_i["game_id"].unique()), set(df_o["game_id"].unique())
    common = gi_in & gi_out
    if not common:
        print(f"Week {w:02d}: no common game_id ‚Äì skip")
        continue

    df_i = df_i.filter(pl.col("game_id").is_in(list(common)))
    df_o = df_o[df_o["game_id"].isin(common)]
    inputs.append(df_i); outputs.append(df_o)
    print(f"Week {w:02d}: input {df_i.shape}, output {df_o.shape}, common={len(common)}")

if not inputs or not outputs:
    raise RuntimeError("No overlapping weeks found. Check dataset path/files.")
df_in, df_out = pl.concat(inputs), pd.concat(outputs, ignore_index=True)
print(f"‚úÖ Loaded input: {df_in.shape}, output: {df_out.shape}")


In [None]:
# ============================================================
# Build Training Table + Features
# ============================================================
const_part = const_first_per_player(df_in)
agg_feats  = aggregate_per_player(df_in)
qb_last_tr = qb_last_xy(df_in)

tgt_x, tgt_y = detect_target_cols(df_out)
df_out_pl = pl.from_pandas(df_out)
frame_template = df_out_pl.select(["game_id","play_id","nfl_id","frame_id",tgt_x,tgt_y]) \
                          .rename({tgt_x:"target_x",tgt_y:"target_y"})
frame_template = to_seconds_frames(frame_template)

for c in ["game_id","play_id","nfl_id"]:
    agg_feats      = agg_feats.with_columns(pl.col(c).cast(pl.Int64))
    const_part     = const_part.with_columns(pl.col(c).cast(pl.Int64))
    frame_template = frame_template.with_columns(pl.col(c).cast(pl.Int64))
qb_last_tr = qb_last_tr.with_columns(pl.col("game_id").cast(pl.Int64),
                                     pl.col("play_id").cast(pl.Int64))

feat_train = (frame_template
              .join(agg_feats,  on=["game_id","play_id","nfl_id"], how="inner")
              .join(const_part, on=["game_id","play_id","nfl_id"], how="left")
              .join(qb_last_tr,  on=["game_id","play_id"],        how="left"))
print(f"‚úÖ Joined features shape: {feat_train.shape}")
if feat_train.height == 0:
    raise RuntimeError("Joined DataFrame empty after filtering.")

pdf = feat_train.to_pandas()

# Targets
if not {"x_last","y_last"}.issubset(pdf.columns):
    raise RuntimeError("x_last/y_last missing after aggregation.")
pdf["dx"] = pdf["target_x"] - pdf["x_last"]
pdf["dy"] = pdf["target_y"] - pdf["y_last"]

# Direction sign: left ‚Üí -1, else +1
play_dir_sign = np.where(pdf.get("play_direction", "right")=="left", -1.0, 1.0)
pdf["dx_dir"] = pdf["dx"] * play_dir_sign
pdf["dy_dir"] = pdf["dy"] * play_dir_sign

# Role for calibration
pdf["role_raw"] = pdf.get("player_role", pd.Series(["UNK"]*len(pdf))).fillna("UNK").astype(str)

# QB-relative + bearings
pdf["qb_x_last"] = pdf["qb_x_last"].fillna(pdf["x_last"])
pdf["qb_y_last"] = pdf["qb_y_last"].fillna(pdf["y_last"])
pdf["rel_x_last_qb"] = pdf["x_last"] - pdf["qb_x_last"]
pdf["rel_y_last_qb"] = pdf["y_last"] - pdf["qb_y_last"]
pdf["dist_to_qb"] = np.sqrt(pdf["rel_x_last_qb"]**2 + pdf["rel_y_last_qb"]**2)
pdf["ang_to_qb"]  = np.arctan2(pdf["rel_y_last_qb"], pdf["rel_x_last_qb"])
pdf["bearing_qb"] = np.degrees(pdf["ang_to_qb"])

# Ball-relative (+ bearings) if present
if {"ball_land_x","ball_land_y"}.issubset(set(pdf.columns)):
    pdf["rel_x_last_ball"] = pdf["x_last"] - pdf["ball_land_x"]
    pdf["rel_y_last_ball"] = pdf["y_last"] - pdf["ball_land_y"]
    pdf["dist_to_ball"]    = np.sqrt(pdf["rel_x_last_ball"]**2 + pdf["rel_y_last_ball"]**2)
    pdf["ang_to_ball"]     = np.arctan2(pdf["rel_y_last_ball"], pdf["rel_x_last_ball"])
    pdf["bearing_ball"]    = np.degrees(pdf["ang_to_ball"])
else:
    for c in ["rel_x_last_ball","rel_y_last_ball","dist_to_ball","ang_to_ball","bearing_ball"]:
        pdf[c] = 0.0

# Context interactions
pdf["play_dir_sign_num"] = play_dir_sign
pdf["yard_norm"] = pdf.get("yard_norm", pdf.get("absolute_yardline_number", 0.0)/100.0)
pdf["yard_dir_ctx"]  = pdf["yard_norm"] * pdf["play_dir_sign_num"]
pdf["fratio_ball_ctx"] = pdf.get("frame_ratio", 0.0) * pdf["dist_to_ball"]

# One-hots
cat_cols = [c for c in ["player_position","player_role","play_direction"] if c in pdf.columns]
if cat_cols:
    print(f"Encoding categorical columns: {cat_cols}")
    pdf = pd.get_dummies(pdf, columns=cat_cols, drop_first=True)

# Feature list
drop_cols = {'game_id','play_id','nfl_id','frame_id',
             'player_position','player_role','player_side','play_direction',
             'target_x','target_y','dx','dy','dx_dir','dy_dir','role_raw'}
feat_cols = [c for c in pdf.columns if c not in drop_cols and str(pdf[c].dtype) in ['float64','float32','int64','int32']]

# Standardize numeric features
scaler = StandardScaler()
pdf[feat_cols] = scaler.fit_transform(pdf[feat_cols])
with open(os.path.join(Cfg.MODEL_DIR, "feature_scaler.pkl"), "wb") as f:
    pickle.dump({"feat_cols": feat_cols, "scaler": scaler}, f)
print(f"Standardized {len(feat_cols)} features.")

# Train arrays
X        = pdf[feat_cols].values
y_abs_dx = pdf["dx"].values
y_abs_dy = pdf["dy"].values
y_dir_dx = pdf["dx_dir"].values
y_dir_dy = pdf["dy_dir"].values
role_raw_tr = pdf["role_raw"].values

print(f"‚úÖ Training rows: {len(pdf):,} | #Features: {len(feat_cols)}")

In [None]:
# ============================================================
# Base Model Params (GPU)
# ============================================================
LGB_PARAMS = dict(objective="regression", metric="rmse", boosting_type="gbdt", device="gpu",
                  gpu_platform_id=0, gpu_device_id=0, n_estimators=1600, learning_rate=0.0115,
                  num_leaves=64, subsample=0.9, colsample_bytree=0.9, min_data_in_leaf=20,
                  lambda_l1=1.0, lambda_l2=1.0, random_state=Cfg.SEED, verbose=-1)
XGB_PARAMS = dict(objective="reg:squarederror", eval_metric="rmse", n_estimators=1700,
                  learning_rate=0.011, max_depth=7, colsample_bytree=0.9, subsample=0.9,
                  gamma=0.05, reg_alpha=0.5, reg_lambda=1.0, random_state=Cfg.SEED,
                  tree_method="gpu_hist", predictor="gpu_predictor", verbosity=0)
CAT_PARAMS = dict(loss_function="RMSE", eval_metric="RMSE", iterations=1500, learning_rate=0.012,
                  depth=6, l2_leaf_reg=3.0, random_seed=Cfg.SEED, task_type="GPU", devices="0", verbose=False)

# Meta LGB (stronger for angles)
META_LGB_PARAMS = dict(objective="regression", metric="rmse", boosting_type="gbdt", device="gpu",
                       gpu_platform_id=0, gpu_device_id=0, n_estimators=400, learning_rate=0.04,
                       num_leaves=25, max_depth=4, subsample=1.0, colsample_bytree=1.0,
                       min_data_in_leaf=20, lambda_l1=0.0, lambda_l2=0.0, random_state=Cfg.SEED, verbose=-1)


In [None]:
# ============================================================
# Cross-Validation with OOF capture (ABS stream)
# ============================================================
if Cfg.USE_GROUP_KF:
    groups = pdf["game_id"].astype(str) + "_" + pdf["play_id"].astype(str)
    cv_iter = list(GroupKFold(n_splits=Cfg.N_FOLDS).split(pdf, groups=groups))
else:
    cv_iter = list(GroupKFold(n_splits=Cfg.N_FOLDS).split(pdf, groups=pdf["game_id"]))

oof_abs_dx_lgb = np.zeros_like(y_abs_dx)
oof_abs_dx_xgb = np.zeros_like(y_abs_dx)
oof_abs_dx_cat = np.zeros_like(y_abs_dx)
oof_abs_dy_lgb = np.zeros_like(y_abs_dy)
oof_abs_dy_xgb = np.zeros_like(y_abs_dy)
oof_abs_dy_cat = np.zeros_like(y_abs_dy)

print("Training ABS base models with GroupKFold ...")
for fold,(tr,va) in enumerate(cv_iter,1):
    X_tr,X_va = X[tr],X[va]
    ydx_tr,ydx_va = y_abs_dx[tr],y_abs_dx[va]
    ydy_tr,ydy_va = y_abs_dy[tr],y_abs_dy[va]

    lgb_dx = lgb.LGBMRegressor(**LGB_PARAMS).fit(X_tr,ydx_tr)
    lgb_dy = lgb.LGBMRegressor(**LGB_PARAMS).fit(X_tr,ydy_tr)
    xgb_dx = xgb.XGBRegressor(**XGB_PARAMS).fit(X_tr,ydx_tr,verbose=False)
    xgb_dy = xgb.XGBRegressor(**XGB_PARAMS).fit(X_tr,ydy_tr,verbose=False)
    cat_dx = CatBoostRegressor(**CAT_PARAMS).fit(X_tr,ydx_tr,verbose=False)
    cat_dy = CatBoostRegressor(**CAT_PARAMS).fit(X_tr,ydy_tr,verbose=False)

    p_dx_lgb,p_dx_xgb,p_dx_cat = lgb_dx.predict(X_va),xgb_dx.predict(X_va),cat_dx.predict(X_va)
    p_dy_lgb,p_dy_xgb,p_dy_cat = lgb_dy.predict(X_va),xgb_dy.predict(X_va),cat_dy.predict(X_va)

    oof_abs_dx_lgb[va],oof_abs_dx_xgb[va],oof_abs_dx_cat[va] = p_dx_lgb,p_dx_xgb,p_dx_cat
    oof_abs_dy_lgb[va],oof_abs_dy_xgb[va],oof_abs_dy_cat[va] = p_dy_lgb,p_dy_xgb,p_dy_cat

    for m,name in [(lgb_dx,"abs_lgb_dx"),(lgb_dy,"abs_lgb_dy"),
                   (xgb_dx,"abs_xgb_dx"),(xgb_dy,"abs_xgb_dy"),
                   (cat_dx,"abs_cat_dx"),(cat_dy,"abs_cat_dy")]:
        with open(os.path.join(Cfg.MODEL_DIR,f"fold{fold}_{name}.pkl"),"wb") as f:
            pickle.dump(m,f)

    print(f"[ABS] Fold {fold}: LGB dx={rmse(ydx_va,p_dx_lgb):.4f} dy={rmse(ydy_va,p_dy_lgb):.4f}")

print("\n[ABS] OOF Blend (0.35/0.45/0.20)")
abs_blend_dx = 0.35*oof_abs_dx_lgb + 0.45*oof_abs_dx_xgb + 0.20*oof_abs_dx_cat
abs_blend_dy = 0.35*oof_abs_dy_lgb + 0.45*oof_abs_dy_xgb + 0.20*oof_abs_dy_cat
print(f"ABS OOF RMSE dx: {rmse(y_abs_dx, abs_blend_dx):.4f}")
print(f"ABS OOF RMSE dy: {rmse(y_abs_dy, abs_blend_dy):.4f}")

# Meta for ABS
metaX_abs = np.vstack([oof_abs_dx_lgb,oof_abs_dx_xgb,oof_abs_dx_cat]).T
metaY_abs = np.vstack([oof_abs_dy_lgb,oof_abs_dy_xgb,oof_abs_dy_cat]).T

def ridge_tuned(Xo, yo):
    best_a, best = None, 1e9
    for a in [0.1,0.5,1,2,5]:
        r = Ridge(alpha=a, fit_intercept=False, positive=True).fit(Xo,yo)
        sc = rmse(yo, r.predict(Xo))
        if sc < best: best, best_a = sc, a
    return Ridge(alpha=best_a, fit_intercept=False, positive=True).fit(Xo,yo), best_a

ridge_abs_x, ax = ridge_tuned(metaX_abs, y_abs_dx)
ridge_abs_y, ay = ridge_tuned(metaY_abs, y_abs_dy)
oof_abs_metaR_dx = ridge_abs_x.predict(metaX_abs)
oof_abs_metaR_dy = ridge_abs_y.predict(metaY_abs)

lgb_abs_x = lgb.LGBMRegressor(**META_LGB_PARAMS).fit(metaX_abs, y_abs_dx)
lgb_abs_y = lgb.LGBMRegressor(**META_LGB_PARAMS).fit(metaY_abs, y_abs_dy)
oof_abs_metaL_dx = lgb_abs_x.predict(metaX_abs)
oof_abs_metaL_dy = lgb_abs_y.predict(metaY_abs)

if Cfg.META_COMPARE:
    print(f"[ABS Meta] Ridge(Œ±={ax}) dx: {rmse(y_abs_dx, oof_abs_metaR_dx):.4f}  | LGB dx: {rmse(y_abs_dx, oof_abs_metaL_dx):.4f}")
    print(f"[ABS Meta] Ridge(Œ±={ay}) dy: {rmse(y_abs_dy, oof_abs_metaR_dy):.4f}  | LGB dy: {rmse(y_abs_dy, oof_abs_metaL_dy):.4f}")

# Blend ABS metas (non-negative)
blend_abs_Xdx = np.vstack([oof_abs_metaR_dx, oof_abs_metaL_dx]).T
blend_abs_Xdy = np.vstack([oof_abs_metaR_dy, oof_abs_metaL_dy]).T
blend_abs_rdx = Ridge(alpha=1e-3, fit_intercept=False, positive=True).fit(blend_abs_Xdx, y_abs_dx)
blend_abs_rdy = Ridge(alpha=1e-3, fit_intercept=False, positive=True).fit(blend_abs_Xdy, y_abs_dy)
oof_abs_meta_dx = blend_abs_rdx.predict(blend_abs_Xdx)
oof_abs_meta_dy = blend_abs_rdy.predict(blend_abs_Xdy)
print(f"[ABS Meta Blend] OOF RMSE dx: {rmse(y_abs_dx, oof_abs_meta_dx):.4f} | dy: {rmse(y_abs_dy, oof_abs_meta_dy):.4f}")

# ============================================================
# üîÑ Save OOF meta predictions for Temporal Clamp refinement
# ============================================================
import pickle, os

OOF_SAVE_PATH = os.path.join(Cfg.MODEL_DIR, "oof_meta_predictions.pkl")

oof_data = {
    "oof_dx_meta_r": oof_abs_metaR_dx,
    "oof_dx_meta_l": oof_abs_metaL_dx,
    "oof_dy_meta_r": oof_abs_metaR_dy,
    "oof_dy_meta_l": oof_abs_metaL_dy,
    "y_dx": y_abs_dx,
    "y_dy": y_abs_dy,
    "pdf": pdf
}

with open(OOF_SAVE_PATH, "wb") as f:
    pickle.dump(oof_data, f)

print(f"‚úÖ Saved OOF meta predictions for Temporal Clamp ‚Üí {OOF_SAVE_PATH}")


In [None]:
# ============================================================
# Cross-Validation with OOF capture (DIR stream)
# ============================================================
oof_dir_dx_lgb = np.zeros_like(y_dir_dx)
oof_dir_dx_xgb = np.zeros_like(y_dir_dx)
oof_dir_dx_cat = np.zeros_like(y_dir_dx)
oof_dir_dy_lgb = np.zeros_like(y_dir_dy)
oof_dir_dy_xgb = np.zeros_like(y_dir_dy)
oof_dir_dy_cat = np.zeros_like(y_dir_dy)

print("\nTraining DIR base models with GroupKFold ...")
for fold,(tr,va) in enumerate(cv_iter,1):
    X_tr,X_va = X[tr],X[va]
    ydx_tr,ydx_va = y_dir_dx[tr],y_dir_dx[va]
    ydy_tr,ydy_va = y_dir_dy[tr],y_dir_dy[va]

    lgb_dx = lgb.LGBMRegressor(**LGB_PARAMS).fit(X_tr,ydx_tr)
    lgb_dy = lgb.LGBMRegressor(**LGB_PARAMS).fit(X_tr,ydy_tr)
    xgb_dx = xgb.XGBRegressor(**XGB_PARAMS).fit(X_tr,ydx_tr,verbose=False)
    xgb_dy = xgb.XGBRegressor(**XGB_PARAMS).fit(X_tr,ydy_tr,verbose=False)
    cat_dx = CatBoostRegressor(**CAT_PARAMS).fit(X_tr,ydx_tr,verbose=False)
    cat_dy = CatBoostRegressor(**CAT_PARAMS).fit(X_tr,ydy_tr,verbose=False)

    p_dx_lgb,p_dx_xgb,p_dx_cat = lgb_dx.predict(X_va),xgb_dx.predict(X_va),cat_dx.predict(X_va)
    p_dy_lgb,p_dy_xgb,p_dy_cat = lgb_dy.predict(X_va),xgb_dy.predict(X_va),cat_dy.predict(X_va)

    oof_dir_dx_lgb[va],oof_dir_dx_xgb[va],oof_dir_dx_cat[va] = p_dx_lgb,p_dx_xgb,p_dx_cat
    oof_dir_dy_lgb[va],oof_dir_dy_xgb[va],oof_dir_dy_cat[va] = p_dy_lgb,p_dy_xgb,p_dy_cat

    for m,name in [(lgb_dx,"dir_lgb_dx"),(lgb_dy,"dir_lgb_dy"),
                   (xgb_dx,"dir_xgb_dx"),(xgb_dy,"dir_xgb_dy"),
                   (cat_dx,"dir_cat_dx"),(cat_dy,"dir_cat_dy")]:
        with open(os.path.join(Cfg.MODEL_DIR,f"fold{fold}_{name}.pkl"),"wb") as f:
            pickle.dump(m,f)

    print(f"[DIR] Fold {fold}: LGB dx={rmse(ydx_va,p_dx_lgb):.4f} dy={rmse(ydy_va,p_dy_lgb):.4f}")

print("\n[DIR] OOF Blend (0.35/0.45/0.20)")
dir_blend_dx = 0.35*oof_dir_dx_lgb + 0.45*oof_dir_dx_xgb + 0.20*oof_dir_dx_cat
dir_blend_dy = 0.35*oof_dir_dy_lgb + 0.45*oof_dir_dy_xgb + 0.20*oof_dir_dy_cat
print(f"DIR OOF RMSE dx_dir: {rmse(y_dir_dx, dir_blend_dx):.4f}")
print(f"DIR OOF RMSE dy_dir: {rmse(y_dir_dy, dir_blend_dy):.4f}")


# ============================================================
# Meta for DIR
# ============================================================

metaX_dir = np.vstack([oof_dir_dx_lgb,oof_dir_dx_xgb,oof_dir_dx_cat]).T
metaY_dir = np.vstack([oof_dir_dy_lgb,oof_dir_dy_xgb,oof_dir_dy_cat]).T

ridge_dir_x, axd = ridge_tuned(metaX_dir, y_dir_dx)
ridge_dir_y, ayd = ridge_tuned(metaY_dir, y_dir_dy)
oof_dir_metaR_dx = ridge_dir_x.predict(metaX_dir)
oof_dir_metaR_dy = ridge_dir_y.predict(metaY_dir)

lgb_dir_x = lgb.LGBMRegressor(**META_LGB_PARAMS).fit(metaX_dir, y_dir_dx)
lgb_dir_y = lgb.LGBMRegressor(**META_LGB_PARAMS).fit(metaY_dir, y_dir_dy)
oof_dir_metaL_dx = lgb_dir_x.predict(metaX_dir)
oof_dir_metaL_dy = lgb_dir_y.predict(metaY_dir)

if Cfg.META_COMPARE:
    print(f"[DIR Meta] Ridge(Œ±={axd}) dx: {rmse(y_dir_dx, oof_dir_metaR_dx):.4f}  | LGB dx: {rmse(y_dir_dx, oof_dir_metaL_dx):.4f}")
    print(f"[DIR Meta] Ridge(Œ±={ayd}) dy: {rmse(y_dir_dy, oof_dir_metaR_dy):.4f}  | LGB dy: {rmse(y_dir_dy, oof_dir_metaL_dy):.4f}")

# Blend DIR metas (non-negative)
blend_dir_Xdx = np.vstack([oof_dir_metaR_dx, oof_dir_metaL_dx]).T
blend_dir_Xdy = np.vstack([oof_dir_metaR_dy, oof_dir_metaL_dy]).T
blend_dir_rdx = Ridge(alpha=1e-3, fit_intercept=False, positive=True).fit(blend_dir_Xdx, y_dir_dx)
blend_dir_rdy = Ridge(alpha=1e-3, fit_intercept=False, positive=True).fit(blend_dir_Xdy, y_dir_dy)
oof_dir_meta_dx = blend_dir_rdx.predict(blend_dir_Xdx)
oof_dir_meta_dy = blend_dir_rdy.predict(blend_dir_Xdy)
print(f"[DIR Meta Blend] OOF RMSE dx_dir: {rmse(y_dir_dx, oof_dir_meta_dx):.4f} | dy_dir: {rmse(y_dir_dy, oof_dir_meta_dy):.4f}")


In [None]:
# ============================================================
# Final Hybrid Blend (ABS vs DIR‚ÜíABS), learn non-negative weights on OOF
# ============================================================
print("\nüîß Learning final ABS vs DIR blend (in ABS space)...")
# Convert DIR OOF metas to ABS space using play_dir_sign
dir2abs_dx = oof_dir_meta_dx * play_dir_sign
dir2abs_dy = oof_dir_meta_dy * play_dir_sign

# Two-feature OOF design matrix: [ABS_meta, DIR_meta_in_abs]
final_X_dx = np.vstack([oof_abs_meta_dx, dir2abs_dx]).T
final_X_dy = np.vstack([oof_abs_meta_dy, dir2abs_dy]).T

final_ridge_dx = Ridge(alpha=1e-3, fit_intercept=False, positive=True).fit(final_X_dx, y_abs_dx)
final_ridge_dy = Ridge(alpha=1e-3, fit_intercept=False, positive=True).fit(final_X_dy, y_abs_dy)

w_abs_x, w_dir_x = final_ridge_dx.coef_
w_abs_y, w_dir_y = final_ridge_dy.coef_
sx = w_abs_x + w_dir_x + 1e-12
sy = w_abs_y + w_dir_y + 1e-12
print(f"Final blend weights ‚Äî X: ABS={w_abs_x/sx:.3f} DIR={w_dir_x/sx:.3f}")
print(f"Final blend weights ‚Äî Y: ABS={w_abs_y/sy:.3f} DIR={w_dir_y/sy:.3f}")

oof_final_dx = final_ridge_dx.predict(final_X_dx)
oof_final_dy = final_ridge_dy.predict(final_X_dy)
print(f"Final OOF RMSE ‚Äî dx: {rmse(y_abs_dx, oof_final_dx):.4f} | dy: {rmse(y_abs_dy, oof_final_dy):.4f}")


In [None]:
# ============================================================
# Role-bias from final blended OOF (absolute space)
# ============================================================
print("\nüéØ Learning per-role residual biases (OOF, shrunken, ABS space) ...")
res_dx = oof_final_dx - y_abs_dx
res_dy = oof_final_dy - y_abs_dy
if role_raw_tr is None or len(role_raw_tr) != len(res_dx):
    role_raw_tr = np.array(["UNK"]*len(res_dx))

df_res = pd.DataFrame({"role": role_raw_tr, "res_dx": res_dx, "res_dy": res_dy})
global_dx = float(df_res["res_dx"].mean())
global_dy = float(df_res["res_dy"].mean())

role_bias_dx, role_bias_dy = {}, {}
counts = df_res.groupby("role").size()
means_dx = df_res.groupby("role")["res_dx"].mean()
means_dy = df_res.groupby("role")["res_dy"].mean()

for role, n in counts.items():
    m_dx = float(means_dx.loc[role])
    m_dy = float(means_dy.loc[role])
    shrink = Cfg.ROLE_BIAS_SHRINK
    role_bias_dx[role] = (n/(n+shrink))*m_dx + (shrink/(n+shrink))*global_dx
    role_bias_dy[role] = (n/(n+shrink))*m_dy + (shrink/(n+shrink))*global_dy

print(f"Computed shrunken biases for {len(role_bias_dx)} roles (examples):",
      dict(list(role_bias_dx.items())[:5]))

# Persist meta objects needed at inference
with open(os.path.join(Cfg.MODEL_DIR, "meta_and_blends.pkl"), "wb") as f:
    pickle.dump({
        # ABS meta
        "ridge_abs_x": ridge_abs_x, "ridge_abs_y": ridge_abs_y,
        "lgb_abs_x": lgb_abs_x, "lgb_abs_y": lgb_abs_y,
        "blend_abs_rdx": blend_abs_rdx, "blend_abs_rdy": blend_abs_rdy,
        # DIR meta
        "ridge_dir_x": ridge_dir_x, "ridge_dir_y": ridge_dir_y,
        "lgb_dir_x": lgb_dir_x, "lgb_dir_y": lgb_dir_y,
        "blend_dir_rdx": blend_dir_rdx, "blend_dir_rdy": blend_dir_rdy,
        # Final blend
        "final_ridge_dx": final_ridge_dx, "final_ridge_dy": final_ridge_dy,
        # Role bias
        "role_bias_dx": role_bias_dx, "role_bias_dy": role_bias_dy
    }, f)


In [None]:
# ============================================================
# Test Inference
# ============================================================
print("\nPreparing test inference ...")
test_input    = pl.read_csv(os.path.join(Cfg.DATA_DIR,"test_input.csv"))
test_template = pl.read_csv(os.path.join(Cfg.DATA_DIR,"test.csv"))
test_template = to_seconds_frames(test_template)

agg_test   = aggregate_per_player(test_input)
qb_last_te = qb_last_xy(test_input)
const_test = const_first_per_player(test_input)

for c in ["game_id","play_id","nfl_id"]:
    agg_test      = agg_test.with_columns(pl.col(c).cast(pl.Int64))
    test_template = test_template.with_columns(pl.col(c).cast(pl.Int64))
    const_test    = const_test.with_columns(pl.col(c).cast(pl.Int64))
qb_last_te = qb_last_te.with_columns(pl.col("game_id").cast(pl.Int64),
                                     pl.col("play_id").cast(pl.Int64))

test_feat = (test_template
             .join(agg_test,   on=["game_id","play_id","nfl_id"], how="left")
             .join(const_test, on=["game_id","play_id","nfl_id"], how="left")
             .join(qb_last_te, on=["game_id","play_id"],          how="left"))

# Ensure last positions
if "x_last" not in test_feat.columns:
    test_feat = test_feat.with_columns(pl.lit(60.0).alias("x_last"))
if "y_last" not in test_feat.columns:
    test_feat = test_feat.with_columns(pl.lit(26.65).alias("y_last"))

# QB-relative quick fill & rel coords in pandas space
test_feat = test_feat.with_columns([
    pl.col("qb_x_last").fill_null(pl.col("x_last")).alias("qb_x_last"),
    pl.col("qb_y_last").fill_null(pl.col("y_last")).alias("qb_y_last"),
    (pl.col("x_last") - pl.col("qb_x_last")).alias("rel_x_last_qb"),
    (pl.col("y_last") - pl.col("qb_y_last")).alias("rel_y_last_qb"),
])

test_pd = test_feat.to_pandas()
test_pd["dist_to_qb"] = np.sqrt(test_pd["rel_x_last_qb"]**2 + test_pd["rel_y_last_qb"]**2)
test_pd["ang_to_qb"]  = np.arctan2(test_pd["rel_y_last_qb"], test_pd["rel_x_last_qb"])
test_pd["bearing_qb"] = np.degrees(test_pd["ang_to_qb"])

if {"ball_land_x","ball_land_y"}.issubset(set(test_pd.columns)):
    test_pd["rel_x_last_ball"] = test_pd["x_last"] - test_pd["ball_land_x"]
    test_pd["rel_y_last_ball"] = test_pd["y_last"] - test_pd["ball_land_y"]
    test_pd["dist_to_ball"]    = np.sqrt(test_pd["rel_x_last_ball"]**2 + test_pd["rel_y_last_ball"]**2)
    test_pd["ang_to_ball"]     = np.arctan2(test_pd["rel_y_last_ball"], test_pd["rel_x_last_ball"])
    test_pd["bearing_ball"]    = np.degrees(test_pd["ang_to_ball"])
else:
    for c in ["rel_x_last_ball","rel_y_last_ball","dist_to_ball","ang_to_ball","bearing_ball"]:
        test_pd[c] = 0.0

# Direction sign for test
play_dir_sign_test = np.where(test_pd.get("play_direction","right")=="left", -1.0, 1.0)
test_pd["play_dir_sign_num"] = play_dir_sign_test

# Context interactions
if "absolute_yardline_number" in test_pd.columns:
    test_pd["yard_norm"] = test_pd["absolute_yardline_number"]/100.0
else:
    test_pd["yard_norm"] = 0.0
if "frame_ratio" not in test_pd.columns:
    test_pd["frame_ratio"] = 0.0
test_pd["yard_dir_ctx"]   = test_pd["yard_norm"] * test_pd["play_dir_sign_num"]
test_pd["fratio_ball_ctx"] = test_pd["frame_ratio"] * test_pd["dist_to_ball"]

# Align & scale features
with open(os.path.join(Cfg.MODEL_DIR, "feature_scaler.pkl"), "rb") as f:
    obj = pickle.load(f)
feat_cols_saved = obj["feat_cols"]; scaler = obj["scaler"]
for c in feat_cols_saved:
    if c not in test_pd.columns:
        test_pd[c] = 0.0
test_pd = test_pd[feat_cols_saved]
test_pd[feat_cols_saved] = scaler.transform(test_pd[feat_cols_saved])
X_test = test_pd.values

# Load metas & role bias
with open(os.path.join(Cfg.MODEL_DIR, "meta_and_blends.pkl"), "rb") as f:
    M = pickle.load(f)

# ========== ABS predictions ==========
pred_abs = {"lgb":{"dx":[],"dy":[]}, "xgb":{"dx":[],"dy":[]}, "cat":{"dx":[],"dy":[]}}
for fold in range(1, Cfg.N_FOLDS+1):
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_abs_lgb_dx.pkl"), "rb") as f: lgb_dx = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_abs_lgb_dy.pkl"), "rb") as f: lgb_dy = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_abs_xgb_dx.pkl"), "rb") as f: xgb_dx = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_abs_xgb_dy.pkl"), "rb") as f: xgb_dy = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_abs_cat_dx.pkl"), "rb") as f: cat_dx = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_abs_cat_dy.pkl"), "rb") as f: cat_dy = pickle.load(f)

    pred_abs["lgb"]["dx"].append(lgb_dx.predict(X_test))
    pred_abs["lgb"]["dy"].append(lgb_dy.predict(X_test))
    pred_abs["xgb"]["dx"].append(xgb_dx.predict(X_test))
    pred_abs["xgb"]["dy"].append(xgb_dy.predict(X_test))
    pred_abs["cat"]["dx"].append(cat_dx.predict(X_test))
    pred_abs["cat"]["dy"].append(cat_dy.predict(X_test))

for algo in pred_abs:
    pred_abs[algo]["dx"] = np.mean(pred_abs[algo]["dx"], axis=0)
    pred_abs[algo]["dy"] = np.mean(pred_abs[algo]["dy"], axis=0)

meta_in_abs_dx = np.vstack([pred_abs["lgb"]["dx"], pred_abs["xgb"]["dx"], pred_abs["cat"]["dx"]]).T
meta_in_abs_dy = np.vstack([pred_abs["lgb"]["dy"], pred_abs["xgb"]["dy"], pred_abs["cat"]["dy"]]).T

dx_abs_r = M["ridge_abs_x"].predict(meta_in_abs_dx)
dy_abs_r = M["ridge_abs_y"].predict(meta_in_abs_dy)
dx_abs_l = M["lgb_abs_x"].predict(meta_in_abs_dx)
dy_abs_l = M["lgb_abs_y"].predict(meta_in_abs_dy)
dx_abs_meta = M["blend_abs_rdx"].predict(np.vstack([dx_abs_r, dx_abs_l]).T)
dy_abs_meta = M["blend_abs_rdy"].predict(np.vstack([dy_abs_r, dy_abs_l]).T)

# ========== DIR predictions (still directional) ==========
pred_dir = {"lgb":{"dx":[],"dy":[]}, "xgb":{"dx":[],"dy":[]}, "cat":{"dx":[],"dy":[]}}
for fold in range(1, Cfg.N_FOLDS+1):
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_dir_lgb_dx.pkl"), "rb") as f: lgb_dx = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_dir_lgb_dy.pkl"), "rb") as f: lgb_dy = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_dir_xgb_dx.pkl"), "rb") as f: xgb_dx = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_dir_xgb_dy.pkl"), "rb") as f: xgb_dy = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_dir_cat_dx.pkl"), "rb") as f: cat_dx = pickle.load(f)
    with open(os.path.join(Cfg.MODEL_DIR, f"fold{fold}_dir_cat_dy.pkl"), "rb") as f: cat_dy = pickle.load(f)

    pred_dir["lgb"]["dx"].append(lgb_dx.predict(X_test))
    pred_dir["lgb"]["dy"].append(lgb_dy.predict(X_test))
    pred_dir["xgb"]["dx"].append(xgb_dx.predict(X_test))
    pred_dir["xgb"]["dy"].append(xgb_dy.predict(X_test))
    pred_dir["cat"]["dx"].append(cat_dx.predict(X_test))
    pred_dir["cat"]["dy"].append(cat_dy.predict(X_test))

for algo in pred_dir:
    pred_dir[algo]["dx"] = np.mean(pred_dir[algo]["dx"], axis=0)
    pred_dir[algo]["dy"] = np.mean(pred_dir[algo]["dy"], axis=0)

meta_in_dir_dx = np.vstack([pred_dir["lgb"]["dx"], pred_dir["xgb"]["dx"], pred_dir["cat"]["dx"]]).T
meta_in_dir_dy = np.vstack([pred_dir["lgb"]["dy"], pred_dir["xgb"]["dy"], pred_dir["cat"]["dy"]]).T

dx_dir_r = M["ridge_dir_x"].predict(meta_in_dir_dx)
dy_dir_r = M["ridge_dir_y"].predict(meta_in_dir_dy)
dx_dir_l = M["lgb_dir_x"].predict(meta_in_dir_dx)
dy_dir_l = M["lgb_dir_y"].predict(meta_in_dir_dy)
dx_dir_meta = M["blend_dir_rdx"].predict(np.vstack([dx_dir_r, dx_dir_l]).T)
dy_dir_meta = M["blend_dir_rdy"].predict(np.vstack([dy_dir_r, dy_dir_l]).T)

# Convert DIR meta predictions to ABS using test play_dir_sign
dx_dir_as_abs = dx_dir_meta * play_dir_sign_test
dy_dir_as_abs = dy_dir_meta * play_dir_sign_test

# Final learned ABS vs DIR blend on TEST
dx_hat = M["final_ridge_dx"].predict(np.vstack([dx_abs_meta, dx_dir_as_abs]).T)
dy_hat = M["final_ridge_dy"].predict(np.vstack([dy_abs_meta, dy_dir_as_abs]).T)

# Per-role bias correction on TEST (ABS space)
role_test = (test_feat.get_column("player_role").to_pandas() if "player_role" in test_feat.columns
             else pd.Series(["UNK"]*test_feat.height))
role_test = role_test.fillna("UNK").astype(str).values
role_bias_dx = M["role_bias_dx"]; role_bias_dy = M["role_bias_dy"]
global_bias_dx = float(np.mean(list(role_bias_dx.values()))) if len(role_bias_dx)>0 else 0.0
global_bias_dy = float(np.mean(list(role_bias_dy.values()))) if len(role_bias_dy)>0 else 0.0
bias_dx_vec = np.array([role_bias_dx.get(r, global_bias_dx) for r in role_test], dtype=float)
bias_dy_vec = np.array([role_bias_dy.get(r, global_bias_dy) for r in role_test], dtype=float)
dx_hat = dx_hat - bias_dx_vec
dy_hat = dy_hat - bias_dy_vec

# Recompose to absolute coordinates
x_abs = test_feat["x_last"].to_numpy() + dx_hat
y_abs = test_feat["y_last"].to_numpy() + dy_hat

# Build submission
test_df_id = test_feat.select(["game_id","play_id","nfl_id","frame_id"]).to_pandas().astype(str)
submission = pd.DataFrame({
    "id": test_df_id["game_id"] + "_" + test_df_id["play_id"] + "_" + test_df_id["nfl_id"] + "_" + test_df_id["frame_id"],
    "x": x_abs,
    "y": y_abs
})
submission.to_csv("/kaggle/working/submission.csv", index=False)
print("‚úÖ submission.csv saved:", submission.shape)
print(submission.head())

gc.collect()

# ============================================================
# ‚ôªÔ∏è Reload OOF meta predictions if training not rerun
# ============================================================
import pickle, os, numpy as np, pandas as pd

OOF_SAVE_PATH = os.path.join(Cfg.MODEL_DIR, "oof_meta_predictions.pkl")
if os.path.exists(OOF_SAVE_PATH):
    print("üîÅ Loading cached OOF meta predictions ...")
    with open(OOF_SAVE_PATH, "rb") as f:
        oof_data = pickle.load(f)
    oof_dx_meta_r = oof_data["oof_dx_meta_r"]
    oof_dx_meta_l = oof_data["oof_dx_meta_l"]
    oof_dy_meta_r = oof_data["oof_dy_meta_r"]
    oof_dy_meta_l = oof_data["oof_dy_meta_l"]
    y_dx = oof_data["y_dx"]
    y_dy = oof_data["y_dy"]
    pdf  = oof_data["pdf"]
    print(f"‚úÖ Reloaded OOF data ‚Äî dx:{len(y_dx):,} dy:{len(y_dy):,}")
else:
    raise RuntimeError("Missing OOF cache ‚Äî run full training before Temporal Clamp.")

# ============================================================
# NFL Big Data Bowl 2026 ‚Äî Daniel GPU V10.3.1 (Temporal Clamp, Standalone)
# Safe to run after your trained V10.3 notebook (no retrain needed)
# ============================================================

import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNet

print("\nüß≠ Running V10.3.1 Temporal Clamp refinement ...")

# ============================================================
# Step 1 ‚Äî Temporal-weighted outlier clamp (on existing OOF)
# ============================================================

# Stack the meta-of-meta input matrices
blend_X_dx = np.vstack([oof_dx_meta_r, oof_dx_meta_l]).T
blend_X_dy = np.vstack([oof_dy_meta_r, oof_dy_meta_l]).T

# Temporal + spatial weighting: later frames and ball proximity
dist = np.asarray(pdf.get("dist_to_ball", 0.0))
fr   = np.asarray(pdf.get("frame_ratio", 0.0))
w_base = fr * (1.0 + dist / (dist.mean() + 1e-6))
w = w_base / (w_base.mean() + 1e-12)

# Probe residuals using LGB meta (most stable)
res_dx_probe = oof_dx_meta_l - y_dx
res_dy_probe = oof_dy_meta_l - y_dy

# Compute 99.8th percentile clip thresholds
q_dx = np.quantile(np.abs(res_dx_probe), 0.998)
q_dy = np.quantile(np.abs(res_dy_probe), 0.998)
mask_dx = np.abs(res_dx_probe) <= q_dx
mask_dy = np.abs(res_dy_probe) <= q_dy
print(f"[Clamp] Kept samples: dx={mask_dx.mean():.4f}, dy={mask_dy.mean():.4f}")

# ============================================================
# üßπ Clamp Stability Guard ‚Äî Clean NaNs / Infs / Scaling
# ============================================================
def safe_clean(X):
    X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)
    if np.abs(X).max() > 1e3:
        X = X / (np.abs(X).max() + 1e-12)
    return X

blend_X_dx = safe_clean(blend_X_dx)
blend_X_dy = safe_clean(blend_X_dy)
y_dx = safe_clean(y_dx)
y_dy = safe_clean(y_dy)
w = safe_clean(w)

# Scale target and features to the same magnitude
from sklearn.preprocessing import StandardScaler

sx, sy = StandardScaler(), StandardScaler()
blend_X_dx_s = sx.fit_transform(blend_X_dx)
blend_X_dy_s = sy.fit_transform(blend_X_dy)
y_dx_s = (y_dx - np.mean(y_dx)) / (np.std(y_dx) + 1e-9)
y_dy_s = (y_dy - np.mean(y_dy)) / (np.std(y_dy) + 1e-9)

# Clip weights to avoid zero or infinity
w = np.clip(w, 1e-6, np.percentile(w, 99.9))

# ============================================================
# Step 2 ‚Äî ElasticNet (L1+L2) meta blend with temporal weights
# ============================================================
blend_en_x = ElasticNet(
    alpha=0.01, l1_ratio=0.25, fit_intercept=True,
    positive=True, max_iter=20000, random_state=Cfg.SEED
)
blend_en_y = ElasticNet(
    alpha=0.01, l1_ratio=0.25, fit_intercept=True,
    positive=True, max_iter=20000, random_state=Cfg.SEED
)

print(f"Training ElasticNet with {mask_dx.sum():,} valid dx and {mask_dy.sum():,} valid dy samples...")

blend_en_x.fit(blend_X_dx_s[mask_dx], y_dx_s[mask_dx], sample_weight=w[mask_dx])
blend_en_y.fit(blend_X_dy_s[mask_dy], y_dy_s[mask_dy], sample_weight=w[mask_dy])

# Reverse scaling to compute actual-weight ratios
wx_r, wx_l = blend_en_x.coef_
wy_r, wy_l = blend_en_y.coef_
nx, ny = wx_r + wx_l + 1e-12, wy_r + wy_l + 1e-12

print(f"[Clamp] Final weights X: Ridge={wx_r/nx:.3f}, LGB={wx_l/nx:.3f}")
print(f"[Clamp] Final weights Y: Ridge={wy_r/ny:.3f}, LGB={wy_l/ny:.3f}")

oof_dx_meta_clamp = blend_en_x.predict(blend_X_dx_s)
oof_dy_meta_clamp = blend_en_y.predict(blend_X_dy_s)
print(f"[Clamp] OOF RMSE dx: {rmse(y_dx_s, oof_dx_meta_clamp):.4f}")
print(f"[Clamp] OOF RMSE dy: {rmse(y_dy_s, oof_dy_meta_clamp):.4f}")

# ============================================================
# Prepare Ridge & LGB meta-level predictions for clamp
# ============================================================
dx_hat_r = M["ridge_abs_x"].predict(meta_in_abs_dx)
dy_hat_r = M["ridge_abs_y"].predict(meta_in_abs_dy)
dx_hat_l = M["lgb_abs_x"].predict(meta_in_abs_dx)
dy_hat_l = M["lgb_abs_y"].predict(meta_in_abs_dy)

# ============================================================
# Step 3 ‚Äî Replace test-time meta blend with clamp blend
# ============================================================
dx_hat = blend_en_x.predict(np.vstack([dx_hat_r, dx_hat_l]).T)
dy_hat = blend_en_y.predict(np.vstack([dy_hat_r, dy_hat_l]).T)

# Apply your existing per-role bias correction & recomposition
dx_hat = dx_hat - bias_dx_vec
dy_hat = dy_hat - bias_dy_vec

x_abs = test_feat["x_last"].to_numpy() + dx_hat
y_abs = test_feat["y_last"].to_numpy() + dy_hat
test_df_id = test_feat.select(["game_id","play_id","nfl_id","frame_id"]).to_pandas().astype(str)
submission = pd.DataFrame({
    "id": test_df_id["game_id"] + "_" + test_df_id["play_id"] + "_" +
          test_df_id["nfl_id"] + "_" + test_df_id["frame_id"],
    "x": x_abs,
    "y": y_abs
})
submission.to_csv("/kaggle/working/submission.csv", index=False)
print("‚úÖ submission.csv saved:", submission.shape)
print(submission.head())

