In [None]:
"""
NFL BIG DATA BOWL 2026 - INFERENCE ONLY
Loads pre-trained winning_models.pkl and generates predictions
FAST - No training, just inference!
"""

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

from multiprocessing import Pool as MultiprocessingPool, cpu_count
from tqdm.auto import tqdm
import pickle
import gc

# ============================================================================
# CONFIG
# ============================================================================

class Config:
    BASEDIR = '/kaggle/input/nfl-big-data-bowl-2026-prediction'
    MODEL_PATH = '/kaggle/input/weaponxnfl/winning_models.pkl'
    
    FIELD_X_MIN, FIELD_X_MAX = 0.0, 120.0
    FIELD_Y_MIN, FIELD_Y_MAX = 0.0, 53.3
    
    K_NEIGH = 6
    RADIUS = 30.0
    TAU = 8.0

# ============================================================================
# UTILITIES
# ============================================================================

def get_velocity(speed, direction_deg):
    theta = np.deg2rad(direction_deg)
    return speed * np.sin(theta), speed * np.cos(theta)

def physics_baseline(x, y, velocity_x, velocity_y, delta_t):
    pred_x = x + velocity_x * delta_t
    pred_y = y + velocity_y * delta_t
    pred_x = np.clip(pred_x, Config.FIELD_X_MIN, Config.FIELD_X_MAX)
    pred_y = np.clip(pred_y, Config.FIELD_Y_MIN, Config.FIELD_Y_MAX)
    return pred_x, pred_y

# ============================================================================
# FEATURE ENGINEERING (SAME AS TRAINING)
# ============================================================================

def get_opponent_features(input_df):
    """Enhanced opponent interaction with MIRROR WR tracking"""
    features = []
    
    for (gid, pid), group in tqdm(input_df.groupby(['game_id', 'play_id']), 
                                   desc="üèà Opponent features", leave=False):
        last = group.sort_values('frame_id').groupby('nfl_id').last()
        
        if len(last) < 2:
            continue
            
        positions = last[['x', 'y']].values
        sides = last['player_side'].values
        speeds = last['s'].values
        directions = last['dir'].values
        roles = last['player_role'].values
        
        receiver_mask = np.isin(roles, ['Targeted Receiver', 'Other Route Runner'])
        
        for i, (nid, side, role) in enumerate(zip(last.index, sides, roles)):
            opp_mask = sides != side
            
            feat = {
                'game_id': gid, 'play_id': pid, 'nfl_id': nid,
                'nearest_opp_dist': 50.0, 'closing_speed': 0.0,
                'num_nearby_opp_3': 0, 'num_nearby_opp_5': 0,
                'mirror_wr_vx': 0.0, 'mirror_wr_vy': 0.0,
                'mirror_offset_x': 0.0, 'mirror_offset_y': 0.0,
            }
            
            if not opp_mask.any():
                features.append(feat)
                continue
            
            opp_positions = positions[opp_mask]
            distances = np.sqrt(((positions[i] - opp_positions)**2).sum(axis=1))
            
            if len(distances) == 0:
                features.append(feat)
                continue
                
            nearest_idx = distances.argmin()
            feat['nearest_opp_dist'] = distances[nearest_idx]
            feat['num_nearby_opp_3'] = (distances < 3.0).sum()
            feat['num_nearby_opp_5'] = (distances < 5.0).sum()
            
            my_vx, my_vy = get_velocity(speeds[i], directions[i])
            opp_speeds = speeds[opp_mask]
            opp_dirs = directions[opp_mask]
            opp_vx, opp_vy = get_velocity(opp_speeds[nearest_idx], opp_dirs[nearest_idx])
            
            rel_vx = my_vx - opp_vx
            rel_vy = my_vy - opp_vy
            to_me = positions[i] - opp_positions[nearest_idx]
            to_me_norm = to_me / (np.linalg.norm(to_me) + 0.1)
            feat['closing_speed'] = -(rel_vx * to_me_norm[0] + rel_vy * to_me_norm[1])
            
            if role == 'Defensive Coverage' and receiver_mask.any():
                rec_positions = positions[receiver_mask]
                rec_distances = np.sqrt(((positions[i] - rec_positions)**2).sum(axis=1))
                
                if len(rec_distances) > 0:
                    closest_rec_idx = rec_distances.argmin()
                    rec_indices = np.where(receiver_mask)[0]
                    actual_rec_idx = rec_indices[closest_rec_idx]
                    
                    rec_vx, rec_vy = get_velocity(speeds[actual_rec_idx], directions[actual_rec_idx])
                    
                    feat['mirror_wr_vx'] = rec_vx
                    feat['mirror_wr_vy'] = rec_vy
                    feat['mirror_offset_x'] = positions[i][0] - rec_positions[closest_rec_idx][0]
                    feat['mirror_offset_y'] = positions[i][1] - rec_positions[closest_rec_idx][1]
            
            features.append(feat)
    
    return pd.DataFrame(features)

def extract_route_patterns(input_df, kmeans, scaler):
    """Route clustering with pre-trained k-means"""
    route_features = []
    
    for (gid, pid, nid), group in tqdm(input_df.groupby(['game_id', 'play_id', 'nfl_id']), 
                                        desc="üõ£Ô∏è  Route patterns", leave=False):
        traj = group.sort_values('frame_id').tail(5)
        
        if len(traj) < 3:
            continue
        
        positions = traj[['x', 'y']].values
        speeds = traj['s'].values
        
        total_dist = np.sum(np.sqrt(np.diff(positions[:, 0])**2 + np.diff(positions[:, 1])**2))
        displacement = np.sqrt((positions[-1, 0] - positions[0, 0])**2 + 
                              (positions[-1, 1] - positions[0, 1])**2)
        straightness = displacement / (total_dist + 0.1)
        
        angles = np.arctan2(np.diff(positions[:, 1]), np.diff(positions[:, 0]))
        if len(angles) > 1:
            angle_changes = np.abs(np.diff(angles))
            max_turn = np.max(angle_changes)
            mean_turn = np.mean(angle_changes)
        else:
            max_turn = mean_turn = 0
        
        speed_mean = speeds.mean()
        speed_change = speeds[-1] - speeds[0] if len(speeds) > 1 else 0
        
        dx = positions[-1, 0] - positions[0, 0]
        dy = positions[-1, 1] - positions[0, 1]
        
        route_features.append({
            'game_id': gid, 'play_id': pid, 'nfl_id': nid,
            'traj_straightness': straightness,
            'traj_max_turn': max_turn,
            'traj_mean_turn': mean_turn,
            'traj_depth': abs(dx),
            'traj_width': abs(dy),
            'speed_mean': speed_mean,
            'speed_change': speed_change,
        })
    
    route_df = pd.DataFrame(route_features)
    feat_cols = ['traj_straightness', 'traj_max_turn', 'traj_mean_turn',
                 'traj_depth', 'traj_width', 'speed_mean', 'speed_change']
    X = route_df[feat_cols].fillna(0)
    
    X_scaled = scaler.transform(X)
    route_df['route_pattern'] = kmeans.predict(X_scaled)
    
    return route_df

def compute_neighbor_embeddings(input_df, k_neigh=Config.K_NEIGH, 
                                radius=Config.RADIUS, tau=Config.TAU):
    """Compute weighted neighbor statistics (GNN-lite)"""
    print("üï∏Ô∏è  Computing GNN-lite neighbor embeddings...")
    
    cols_needed = ["game_id", "play_id", "nfl_id", "frame_id", "x", "y", 
                   "velocity_x", "velocity_y", "player_side"]
    src = input_df[cols_needed].copy()
    
    last = (src.sort_values(["game_id", "play_id", "nfl_id", "frame_id"])
               .groupby(["game_id", "play_id", "nfl_id"], as_index=False)
               .tail(1)
               .rename(columns={"frame_id": "last_frame_id"})
               .reset_index(drop=True))
    
    tmp = last.merge(
        src.rename(columns={
            "frame_id": "nb_frame_id", "nfl_id": "nfl_id_nb",
            "x": "x_nb", "y": "y_nb", 
            "velocity_x": "vx_nb", "velocity_y": "vy_nb", 
            "player_side": "player_side_nb"
        }),
        left_on=["game_id", "play_id", "last_frame_id"],
        right_on=["game_id", "play_id", "nb_frame_id"],
        how="left"
    )
    
    tmp = tmp[tmp["nfl_id_nb"] != tmp["nfl_id"]]
    tmp["dx"] = tmp["x_nb"] - tmp["x"]
    tmp["dy"] = tmp["y_nb"] - tmp["y"]
    tmp["dvx"] = tmp["vx_nb"] - tmp["velocity_x"]
    tmp["dvy"] = tmp["vy_nb"] - tmp["velocity_y"]
    tmp["dist"] = np.sqrt(tmp["dx"]**2 + tmp["dy"]**2)
    
    tmp = tmp[np.isfinite(tmp["dist"])]
    tmp = tmp[tmp["dist"] > 1e-6]
    if radius is not None:
        tmp = tmp[tmp["dist"] <= radius]
    
    tmp["is_ally"] = (tmp["player_side_nb"].fillna("") == tmp["player_side"].fillna("")).astype(np.float32)
    
    keys = ["game_id", "play_id", "nfl_id"]
    tmp["rnk"] = tmp.groupby(keys)["dist"].rank(method="first")
    if k_neigh is not None:
        tmp = tmp[tmp["rnk"] <= float(k_neigh)]
    
    tmp["w"] = np.exp(-tmp["dist"] / float(tau))
    sum_w = tmp.groupby(keys)["w"].transform("sum")
    tmp["wn"] = np.where(sum_w > 0, tmp["w"] / sum_w, 0.0)
    
    tmp["wn_ally"] = tmp["wn"] * tmp["is_ally"]
    tmp["wn_opp"] = tmp["wn"] * (1.0 - tmp["is_ally"])
    
    for col in ["dx", "dy", "dvx", "dvy"]:
        tmp[f"{col}_ally_w"] = tmp[col] * tmp["wn_ally"]
        tmp[f"{col}_opp_w"] = tmp[col] * tmp["wn_opp"]
    
    tmp["dist_ally"] = np.where(tmp["is_ally"] > 0.5, tmp["dist"], np.nan)
    tmp["dist_opp"] = np.where(tmp["is_ally"] < 0.5, tmp["dist"], np.nan)
    
    ag = tmp.groupby(keys).agg(
        gnn_ally_dx_mean=("dx_ally_w", "sum"),
        gnn_ally_dy_mean=("dy_ally_w", "sum"),
        gnn_ally_dvx_mean=("dvx_ally_w", "sum"),
        gnn_ally_dvy_mean=("dvy_ally_w", "sum"),
        gnn_opp_dx_mean=("dx_opp_w", "sum"),
        gnn_opp_dy_mean=("dy_opp_w", "sum"),
        gnn_opp_dvx_mean=("dvx_opp_w", "sum"),
        gnn_opp_dvy_mean=("dvy_opp_w", "sum"),
        gnn_ally_cnt=("is_ally", "sum"),
        gnn_opp_cnt=("is_ally", lambda s: float(len(s) - s.sum())),
        gnn_ally_dmin=("dist_ally", "min"),
        gnn_ally_dmean=("dist_ally", "mean"),
        gnn_opp_dmin=("dist_opp", "min"),
        gnn_opp_dmean=("dist_opp", "mean"),
    ).reset_index()
    
    near = tmp.loc[tmp["rnk"] <= 3, keys + ["rnk", "dist"]].copy()
    near["rnk"] = near["rnk"].astype(int)
    dwide = near.pivot_table(index=keys, columns="rnk", values="dist", aggfunc="first")
    dwide = dwide.rename(columns={1: "gnn_d1", 2: "gnn_d2", 3: "gnn_d3"}).reset_index()
    ag = ag.merge(dwide, on=keys, how="left")
    
    for c in ["gnn_ally_dx_mean", "gnn_ally_dy_mean", "gnn_ally_dvx_mean", "gnn_ally_dvy_mean",
              "gnn_opp_dx_mean", "gnn_opp_dy_mean", "gnn_opp_dvx_mean", "gnn_opp_dvy_mean"]:
        ag[c] = ag[c].fillna(0.0)
    for c in ["gnn_ally_cnt", "gnn_opp_cnt"]:
        ag[c] = ag[c].fillna(0.0)
    for c in ["gnn_ally_dmin", "gnn_opp_dmin", "gnn_ally_dmean", "gnn_opp_dmean", 
              "gnn_d1", "gnn_d2", "gnn_d3"]:
        ag[c] = ag[c].fillna(radius if radius is not None else 30.0)
    
    return ag

def engineer_base_features(df):
    """Base features"""
    df = df.copy()
    
    df['velocity_x'] = df['s'] * np.sin(np.radians(df['dir']))
    df['velocity_y'] = df['s'] * np.cos(np.radians(df['dir']))
    
    df['dist_to_ball'] = np.sqrt((df['x'] - df['ball_land_x'])**2 + 
                                  (df['y'] - df['ball_land_y'])**2)
    df['angle_to_ball'] = np.arctan2(df['ball_land_y'] - df['y'],
                                      df['ball_land_x'] - df['x'])
    df['velocity_toward_ball'] = (df['velocity_x'] * np.cos(df['angle_to_ball']) + 
                                   df['velocity_y'] * np.sin(df['angle_to_ball']))
    
    df['orientation_diff'] = np.abs(df['o'] - df['dir'])
    df['orientation_diff'] = np.minimum(df['orientation_diff'], 360 - df['orientation_diff'])
    
    df['role_targeted_receiver'] = (df['player_role'] == 'Targeted Receiver').astype(int)
    df['role_defensive_coverage'] = (df['player_role'] == 'Defensive Coverage').astype(int)
    df['role_passer'] = (df['player_role'] == 'Passer').astype(int)
    df['side_offense'] = (df['player_side'] == 'Offense').astype(int)
    
    height_parts = df['player_height'].str.split('-', expand=True)
    df['height_inches'] = height_parts[0].astype(float) * 12 + height_parts[1].astype(float)
    df['bmi'] = (df['player_weight'] / (df['height_inches']**2)) * 703
    
    df['acceleration_x'] = df['a'] * np.cos(np.radians(df['dir']))
    df['acceleration_y'] = df['a'] * np.sin(np.radians(df['dir']))
    df['speed_squared'] = df['s'] ** 2
    df['accel_magnitude'] = np.sqrt(df['acceleration_x']**2 + df['acceleration_y']**2)
    df['velocity_alignment'] = np.cos(df['angle_to_ball'] - np.radians(df['dir']))
    
    df['momentum_x'] = df['player_weight'] * df['velocity_x']
    df['momentum_y'] = df['player_weight'] * df['velocity_y']
    df['kinetic_energy'] = 0.5 * df['player_weight'] * df['speed_squared']
    
    df['angle_diff'] = np.abs(df['o'] - np.degrees(df['angle_to_ball']))
    df['angle_diff'] = np.minimum(df['angle_diff'], 360 - df['angle_diff'])
    
    df['dist_squared'] = df['dist_to_ball'] ** 2
    
    return df

def add_time_features(df):
    """Time features"""
    df = df.copy()
    
    max_frames = df['num_frames_output']
    
    df['max_play_duration'] = max_frames / 10.0
    df['frame_time'] = df['frame_id'] / 10.0
    df['progress_ratio'] = df['frame_id'] / max_frames
    df['time_remaining'] = (max_frames - df['frame_id']) / 10.0
    df['frames_remaining'] = max_frames - df['frame_id']
    
    df['expected_x_at_ball'] = df['x'] + df['velocity_x'] * df['frame_time']
    df['expected_y_at_ball'] = df['y'] + df['velocity_y'] * df['frame_time']
    df['error_from_ball_x'] = df['expected_x_at_ball'] - df['ball_land_x']
    df['error_from_ball_y'] = df['expected_y_at_ball'] - df['ball_land_y']
    df['error_from_ball'] = np.sqrt(df['error_from_ball_x']**2 + df['error_from_ball_y']**2)
    
    df['time_squared'] = df['frame_time'] ** 2
    df['weighted_dist_by_time'] = df['dist_to_ball'] / (df['frame_time'] + 0.1)
    
    df['velocity_x_progress'] = df['velocity_x'] * df['progress_ratio']
    df['velocity_y_progress'] = df['velocity_y'] * df['progress_ratio']
    df['dist_scaled_by_progress'] = df['dist_to_ball'] * (1 - df['progress_ratio'])
    df['speed_scaled_by_time_left'] = df['s'] * df['time_remaining']
    
    df['actual_play_length'] = max_frames
    df['length_ratio'] = max_frames / 30.0
    
    return df

def add_sequence_features(df):
    """Temporal lag and rolling features"""
    df = df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
    group_cols = ['game_id', 'play_id', 'nfl_id']
    
    for lag in [1, 2, 3, 4, 5]:
        for col in ['x', 'y', 'velocity_x', 'velocity_y', 's', 'a']:
            if col in df.columns:
                df[f'{col}_lag{lag}'] = df.groupby(group_cols)[col].shift(lag)
    
    for window in [3, 5]:
        for col in ['x', 'y', 'velocity_x', 'velocity_y', 's']:
            if col in df.columns:
                df[f'{col}_rolling_mean_{window}'] = (
                    df.groupby(group_cols)[col]
                      .rolling(window, min_periods=1).mean()
                      .reset_index(level=[0,1,2], drop=True)
                )
                df[f'{col}_rolling_std_{window}'] = (
                    df.groupby(group_cols)[col]
                      .rolling(window, min_periods=1).std()
                      .reset_index(level=[0,1,2], drop=True)
                )
    
    for col in ['velocity_x', 'velocity_y']:
        if col in df.columns:
            df[f'{col}_delta'] = df.groupby(group_cols)[col].diff()
    
    return df

def add_pressure_features(df):
    """Pressure metrics from opponent proximity"""
    if 'nearest_opp_dist' in df.columns:
        df['pressure'] = 1 / np.maximum(df['nearest_opp_dist'], 0.5)
        df['under_pressure'] = (df['nearest_opp_dist'] < 3).astype(int)
        df['pressure_x_speed'] = df['pressure'] * df['s']
    
    if 'mirror_wr_vx' in df.columns:
        s_safe = np.maximum(df['s'], 0.1)
        df['mirror_similarity'] = (
            df['velocity_x'] * df['mirror_wr_vx'] + 
            df['velocity_y'] * df['mirror_wr_vy']
        ) / s_safe
        df['mirror_offset_dist'] = np.sqrt(
            df['mirror_offset_x']**2 + df['mirror_offset_y']**2
        )
        df['mirror_alignment'] = df['mirror_similarity'] * df['role_defensive_coverage']
    
    return df

def predict_patterns(df, forward_features, auxiliary_models, pattern_targets):
    """Use auxiliary models to predict patterns"""
    X = df[forward_features].values
    
    for target in pattern_targets:
        preds = np.mean([
            model.predict(X)
            for model in auxiliary_models[target]
        ], axis=0)
        
        pred_col = target.replace('gt_', 'pred_')
        df[pred_col] = preds
    
    return df

# ============================================================================
# MAIN INFERENCE
# ============================================================================

def main():
    print("üèà" + "="*58 + "üèà")
    print("   NFL BIG DATA BOWL 2026 - INFERENCE ONLY")
    print("   üöÄ Loading pre-trained models and generating predictions")
    print("üèà" + "="*58 + "üèà\n")
    
    # Load pre-trained models
    print("üì¶ Loading models from:", Config.MODEL_PATH)
    with open(Config.MODEL_PATH, 'rb') as f:
        model_data = pickle.load(f)
    
    models_x = model_data['models_x']
    models_y = model_data['models_y']
    auxiliary_models = model_data['auxiliary_models']
    forward_features = model_data['forward_features']
    pattern_targets = model_data['pattern_targets']
    predicted_pattern_features = model_data['predicted_pattern_features']
    route_kmeans = model_data['route_kmeans']
    route_scaler = model_data['route_scaler']
    
    print(f"‚úÖ Loaded {len(models_x)} main models")
    print(f"‚úÖ Loaded {len(auxiliary_models)} auxiliary models")
    print(f"‚úÖ Features: {len(forward_features)} forward + {len(predicted_pattern_features)} patterns\n")
    
    # Load test data
    print("üìä Loading test data...")
    test_input = pd.read_csv(f'{Config.BASEDIR}/test_input.csv')
    test_template = pd.read_csv(f'{Config.BASEDIR}/test.csv')
    
    print(f"‚úÖ Test input: {test_input.shape}")
    print(f"‚úÖ Test template: {test_template.shape}\n")
    
    # Feature engineering pipeline
    print("‚öôÔ∏è  Feature Engineering...")
    print("1Ô∏è‚É£  Base features...")
    test_features = engineer_base_features(test_input)
    
    print("2Ô∏è‚É£  Temporal sequence features...")
    test_features = add_sequence_features(test_features)
    
    print("3Ô∏è‚É£  Opponent + Mirror WR features...")
    test_opp = get_opponent_features(test_input)
    
    print("4Ô∏è‚É£  Route pattern clustering...")
    test_route = extract_route_patterns(test_input, route_kmeans, route_scaler)
    
    print("5Ô∏è‚É£  GNN-lite neighbor embeddings...")
    test_gnn = compute_neighbor_embeddings(test_features)
    
    print("6Ô∏è‚É£  Merging features...")
    test_features = test_features.merge(test_opp, on=['game_id', 'play_id', 'nfl_id'], how='left')
    test_features = test_features.merge(test_route, on=['game_id', 'play_id', 'nfl_id'], how='left')
    test_features = test_features.merge(test_gnn, on=['game_id', 'play_id', 'nfl_id'], how='left')
    
    print("7Ô∏è‚É£  Pressure metrics...")
    test_features = add_pressure_features(test_features)
    
    # üî• CRITICAL: Use correct logic - get last frame, drop frame_id
    print("\n8Ô∏è‚É£  Preparing test features (CORRECTED LOGIC)...")
    test_base = test_features.groupby(['game_id', 'play_id', 'nfl_id']).last().reset_index()
    if 'frame_id' in test_base.columns:
        test_base = test_base.drop('frame_id', axis=1)
    
    print(f"   Test base shape: {test_base.shape}")
    
    # Merge with test template (which has OUTPUT frame_ids)
    test_merged = test_template.merge(test_base, on=['game_id', 'play_id', 'nfl_id'], how='left')
    
    # Add time features using OUTPUT frame_ids
    print("9Ô∏è‚É£  Adding time features...")
    test_merged = add_time_features(test_merged)
    
    print(f"   Test merged shape: {test_merged.shape}")
    
    # Predict auxiliary patterns
    print("\nüîÆ Predicting football patterns...")
    test_merged = predict_patterns(test_merged, forward_features, auxiliary_models, pattern_targets)
    
    # Prepare features
    all_features_list = forward_features + predicted_pattern_features
    
    # Fill missing features
    for col in all_features_list:
        if col not in test_merged.columns:
            test_merged[col] = 0
    
    X_test = test_merged[all_features_list].fillna(0).values
    
    print(f"‚úÖ Feature matrix ready: {X_test.shape}")
    
    # Generate predictions
    print("\nüéØ Generating predictions...")
    
    # Physics baseline
    baseline_x, baseline_y = physics_baseline(
        test_merged['x'].values,
        test_merged['y'].values,
        test_merged['velocity_x'].values,
        test_merged['velocity_y'].values,
        test_merged['frame_time'].values
    )
    
    # Predict residuals (ensemble across folds)
    print("   Ensemble predictions from 5 folds...")
    pred_x_res = np.mean([model.predict(X_test) for model in models_x], axis=0)
    pred_y_res = np.mean([model.predict(X_test) for model in models_y], axis=0)
    
    # Add to physics baseline and clip to field
    pred_x = np.clip(baseline_x + pred_x_res, Config.FIELD_X_MIN, Config.FIELD_X_MAX)
    pred_y = np.clip(baseline_y + pred_y_res, Config.FIELD_Y_MIN, Config.FIELD_Y_MAX)
    
    # Create submission
    print("\nüìù Creating submission...")
    test_merged['id'] = (test_merged['game_id'].astype(str) + '_' +
                         test_merged['play_id'].astype(str) + '_' +
                         test_merged['nfl_id'].astype(str) + '_' +
                         test_merged['frame_id'].astype(str))
    
    submission = pd.DataFrame({
        'id': test_merged['id'],
        'x': pred_x,
        'y': pred_y
    })
    
    submission.to_csv("submission.csv", index=False)
    
    print("\n" + "="*60)
    print("üèÜ SUBMISSION COMPLETE")
    print("="*60)
    print(f"‚úì Saved submission.csv ({len(submission)} rows)")
    print(f"‚úì X range: [{submission['x'].min():.2f}, {submission['x'].max():.2f}]")
    print(f"‚úì Y range: [{submission['y'].min():.2f}, {submission['y'].max():.2f}]")
    print(f"‚úì No NaN: {submission.isnull().sum().sum() == 0}")
    print(f"\nüî• Ready to submit and WIN!")
    print("="*60 + "\n")
    
    # Preview
    print("Preview (first 10 rows):")
    print(submission.head(10))
    
    return submission

if __name__ == "__main__":
    main()