In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
from sklearn.model_selection import GroupKFold
from pathlib import Path
import warnings
import gc
import time

warnings.filterwarnings('ignore')

# ============ GPU Setup ============
def setup_gpus():
    if torch.cuda.is_available():
        gpu_count = torch.cuda.device_count()
        print(f"GPUs available: {gpu_count}")
        for i in range(gpu_count):
            gpu_name = torch.cuda.get_device_name(i)
            gpu_memory = torch.cuda.get_device_properties(i).total_memory / 1e9
            print(f"GPU {i}: {gpu_name} - {gpu_memory:.2f} GB")
        
        device = torch.device('cuda')
        torch.cuda.empty_cache()
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.deterministic = False
        
        return device, gpu_count
    else:
        print("No GPU available, using CPU")
        return torch.device('cpu'), 0

device, gpu_count = setup_gpus()

# ============ Robust Dataset ============
class RobustNFLDataset(Dataset):
    def __init__(self, data, is_training=True, stats_dict=None):
        self.is_training = is_training
        self.stats_dict = stats_dict if stats_dict is not None else {}
        
        print("Pre-computing features...")
        data = data.copy()
        
        # Ensure critical columns exist with proper defaults
        if 'frame_id' not in data.columns:
            data['frame_id'] = 1
        if 'num_frames_output' not in data.columns:
            data['num_frames_output'] = 10
        if 'x_last' not in data.columns:
            data['x_last'] = 60.0
        if 'y_last' not in data.columns:
            data['y_last'] = 26.65
        if 'ball_land_x' not in data.columns:
            data['ball_land_x'] = 60.0
        if 'ball_land_y' not in data.columns:
            data['ball_land_y'] = 26.65
        if 'absolute_yardline_number' not in data.columns:
            data['absolute_yardline_number'] = 50
            
        # Height and weight handling
        def safe_height_to_inches(ht):
            if isinstance(ht, str) and '-' in ht:
                try:
                    f, ins = ht.split('-')
                    return int(f) * 12 + int(ins)
                except:
                    return 72
            return 72
        
        if 'player_height' in data.columns:
            data['player_height'] = data['player_height'].apply(safe_height_to_inches)
        else:
            data['player_height'] = 72
            
        if 'player_weight' not in data.columns:
            data['player_weight'] = 220
        else:
            data['player_weight'] = data['player_weight'].fillna(220)
        
        # Time features
        data['frame_offset'] = data['frame_id'].astype(float)
        data['time_offset'] = data['frame_offset'] / 10.0
        data['T'] = np.maximum(data['num_frames_output'].astype(float), 1.0)
        data['t_rel'] = data['frame_offset'] / data['T']
        data['t_rel_squared'] = data['t_rel'] ** 2
        data['t_rel_cubed'] = data['t_rel'] ** 3
        
        # Ball distance and angle
        dx_ball = data['ball_land_x'] - data['x_last']
        dy_ball = data['ball_land_y'] - data['y_last']
        data['dist_to_ball'] = np.sqrt(dx_ball**2 + dy_ball**2 + 1e-6)
        angle_to_ball = np.arctan2(dy_ball, dx_ball)
        data['sin_angle_ball'] = np.sin(angle_to_ball)
        data['cos_angle_ball'] = np.cos(angle_to_ball)
        data['log_dist_ball'] = np.log1p(data['dist_to_ball'])
        
        # Velocity features
        if 's' in data.columns and 'dir' in data.columns:
            dir_rad = np.deg2rad(data['dir'].fillna(0))
            data['speed_x'] = data['s'].fillna(0) * np.sin(dir_rad)
            data['speed_y'] = data['s'].fillna(0) * np.cos(dir_rad)
            
            # Parallel and perpendicular velocity
            den = data['dist_to_ball'] + 1e-6
            ux = dx_ball / den
            uy = dy_ball / den
            data['v_parallel'] = data['speed_x'] * ux + data['speed_y'] * uy
            data['v_perpendicular'] = data['speed_x'] * uy - data['speed_y'] * ux
        else:
            data['s'] = 0
            data['dir'] = 0
            data['speed_x'] = 0
            data['speed_y'] = 0
            data['v_parallel'] = 0
            data['v_perpendicular'] = 0
        
        # Acceleration features
        if 'a' not in data.columns:
            data['a'] = 0
        
        # Orientation features
        if 'o' in data.columns:
            o_rad = np.deg2rad(data['o'].fillna(0))
            data['sin_orientation'] = np.sin(o_rad)
            data['cos_orientation'] = np.cos(o_rad)
            data['orientation_ball_alignment'] = np.cos(o_rad - angle_to_ball)
        else:
            data['o'] = 0
            data['sin_orientation'] = 0
            data['cos_orientation'] = 1
            data['orientation_ball_alignment'] = 0
        
        # Player role features - SAFE HANDLING
        if 'player_role' in data.columns:
            data['is_target'] = (data['player_role'] == 'Targeted Receiver').astype(float)
            data['is_passer'] = (data['player_role'] == 'Passer').astype(float)
            data['is_coverage'] = (data['player_role'] == 'Defensive Coverage').astype(float)
        else:
            data['is_target'] = 0
            data['is_passer'] = 0
            data['is_coverage'] = 0
        
        # Target receiver position - SAFE HANDLING
        data['target_x'] = data['ball_land_x']  # Default to ball position
        data['target_y'] = data['ball_land_y']
        
        if 'player_role' in data.columns and 'is_target' in data.columns:
            target_data = data[data['is_target'] == 1]
            if len(target_data) > 0:
                # Get target positions per play
                target_positions = target_data.groupby(['game_id', 'play_id'])[['x_last', 'y_last']].first()
                target_positions = target_positions.rename(columns={'x_last': 'target_x_temp', 'y_last': 'target_y_temp'})
                target_positions = target_positions.reset_index()
                
                # Merge safely
                if len(target_positions) > 0:
                    data = data.merge(target_positions, on=['game_id', 'play_id'], how='left')
                    data['target_x'] = data['target_x_temp'].fillna(data['target_x'])
                    data['target_y'] = data['target_y_temp'].fillna(data['target_y'])
                    data = data.drop(columns=['target_x_temp', 'target_y_temp'])
        
        # Target distance
        dx_target = data['target_x'] - data['x_last']
        dy_target = data['target_y'] - data['y_last']
        data['dist_to_target'] = np.sqrt(dx_target**2 + dy_target**2 + 1e-6)
        data['log_dist_target'] = np.log1p(data['dist_to_target'])
        
        # Side encoding
        if 'player_side' in data.columns:
            data['is_offense'] = (data['player_side'] == 'Offense').astype(float)
            data['is_defense'] = (data['player_side'] == 'Defense').astype(float)
        else:
            data['is_offense'] = 0
            data['is_defense'] = 0
        
        # Direction encoding
        if 'play_direction' in data.columns:
            data['play_left'] = (data['play_direction'] == 'left').astype(float)
        else:
            data['play_left'] = 0
        
        # Normalized positions
        data['x_norm'] = data['x_last'] / 120.0
        data['y_norm'] = data['y_last'] / 53.3
        data['ball_x_norm'] = data['ball_land_x'] / 120.0
        data['ball_y_norm'] = data['ball_land_y'] / 53.3
        
        # Build feature matrix
        feature_cols = [
            'x_last', 'y_last', 'x_norm', 'y_norm',
            's', 'a', 'o', 'dir',
            'speed_x', 'speed_y', 'v_parallel', 'v_perpendicular',
            'frame_offset', 'time_offset', 't_rel', 't_rel_squared', 't_rel_cubed',
            'dist_to_ball', 'sin_angle_ball', 'cos_angle_ball', 'log_dist_ball',
            'dist_to_target', 'log_dist_target',
            'sin_orientation', 'cos_orientation', 'orientation_ball_alignment',
            'ball_x_norm', 'ball_y_norm', 'ball_land_x', 'ball_land_y',
            'is_target', 'is_passer', 'is_coverage',
            'is_offense', 'is_defense', 'play_left',
            'player_height', 'player_weight',
            'absolute_yardline_number', 'num_frames_output', 'T'
        ]
        
        # Collect features safely
        features_list = []
        for col in feature_cols:
            if col in data.columns:
                features_list.append(data[col].fillna(0).values)
            else:
                features_list.append(np.zeros(len(data)))
        
        self.features = np.stack(features_list, axis=1).astype(np.float32)
        
        # Normalize features
        self.normalize_features()
        
        # Store metadata
        self.metadata = data[['game_id', 'play_id', 'nfl_id', 'frame_id', 
                              'x_last', 'y_last']].fillna(0).values
        
        # Training targets
        if self.is_training:
            if 'displacement_x' in data.columns and 'displacement_y' in data.columns:
                self.targets = data[['displacement_x', 'displacement_y']].fillna(0).values.astype(np.float32)
            elif 'x' in data.columns and 'y' in data.columns:
                self.targets = np.stack([
                    data['x'].fillna(60) - data['x_last'].fillna(60),
                    data['y'].fillna(26.65) - data['y_last'].fillna(26.65)
                ], axis=1).astype(np.float32)
                self.targets = np.clip(self.targets, -10, 10)
            else:
                self.targets = np.zeros((len(data), 2), dtype=np.float32)
            
            # Sample weights
            w_time = data['t_rel'].values
            w_time = 1.0 + 0.6 * (w_time - w_time.min()) / (w_time.max() - w_time.min() + 1e-9)
            w_role = np.where(data.get('is_target', pd.Series(np.zeros(len(data)))).values == 1, 2.0, 1.0)
            self.weights = (w_time * w_role).astype(np.float32)
        
        print(f"Dataset ready: {len(self)} samples, {self.features.shape[1]} features")
    
    def normalize_features(self):
        """Normalize features safely"""
        skip_indices = set([30, 31, 32, 33, 34, 35])  # Binary features
        
        if self.is_training and 'mean' not in self.stats_dict:
            self.stats_dict['mean'] = np.zeros(self.features.shape[1])
            self.stats_dict['std'] = np.ones(self.features.shape[1])
            
            for i in range(self.features.shape[1]):
                if i not in skip_indices:
                    self.stats_dict['mean'][i] = np.mean(self.features[:, i])
                    self.stats_dict['std'][i] = np.std(self.features[:, i]) + 1e-6
                    self.features[:, i] = (self.features[:, i] - self.stats_dict['mean'][i]) / self.stats_dict['std'][i]
        elif 'mean' in self.stats_dict:
            for i in range(self.features.shape[1]):
                if i not in skip_indices:
                    self.features[:, i] = (self.features[:, i] - self.stats_dict['mean'][i]) / self.stats_dict['std'][i]
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        sample = {
            'features': torch.from_numpy(self.features[idx]),
            'game_id': self.metadata[idx, 0],
            'play_id': self.metadata[idx, 1],
            'nfl_id': self.metadata[idx, 2],
            'frame_id': self.metadata[idx, 3],
            'x_last': self.metadata[idx, 4],
            'y_last': self.metadata[idx, 5]
        }
        
        if self.is_training:
            sample['target'] = torch.from_numpy(self.targets[idx])
            sample['weight'] = self.weights[idx]
        
        return sample

# ============ Robust Models ============
class AttentionBlock(nn.Module):
    def __init__(self, dim, num_heads=8, dropout=0.1):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.attn = nn.MultiheadAttention(dim, num_heads, dropout=dropout, batch_first=True)
        self.mlp = nn.Sequential(
            nn.Linear(dim, dim * 4),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(dim * 4, dim),
            nn.Dropout(dropout)
        )
    
    def forward(self, x):
        x = x.unsqueeze(1) if x.dim() == 2 else x
        normed = self.norm(x)
        x = x + self.attn(normed, normed, normed)[0]
        x = x + self.mlp(self.norm(x))
        return x.squeeze(1) if x.size(1) == 1 else x

class RobustTrajectoryModel(nn.Module):
    def __init__(self, input_dim=41, hidden_dim=512, num_layers=6, dropout=0.2):
        super().__init__()
        
        # Input projection
        self.input_proj = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )
        
        # Main blocks
        self.blocks = nn.ModuleList([
            AttentionBlock(hidden_dim, num_heads=8, dropout=dropout)
            for _ in range(num_layers)
        ])
        
        # Output heads
        self.output_head = nn.Sequential(
            nn.Linear(hidden_dim, 256),
            nn.LayerNorm(256),
            nn.GELU(),
            nn.Dropout(dropout/2),
            nn.Linear(256, 128),
            nn.GELU(),
            nn.Linear(128, 2)
        )
        
        self.apply(self._init_weights)
    
    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)
        elif isinstance(m, nn.LayerNorm):
            nn.init.ones_(m.weight)
            nn.init.zeros_(m.bias)
    
    def forward(self, x):
        x = self.input_proj(x)
        
        for i, block in enumerate(self.blocks):
            x = block(x)
            if i == len(self.blocks) // 2:  # Skip connection at middle
                identity = x
            elif i == len(self.blocks) - 1:  # Add skip at end
                x = x + identity * 0.3
        
        return self.output_head(x)

# ============ Training Pipeline ============
class RobustPipeline:
    def __init__(self, data_dir="/kaggle/input/nfl-big-data-bowl-2026-prediction/"):
        self.data_dir = Path(data_dir)
        self.device = device
        self.gpu_count = gpu_count
        self.models = []
        self.stats_dict = {}
        self.scaler = GradScaler() if device.type == 'cuda' else None
    
    def load_and_prepare_data(self):
        """Load and prepare data safely"""
        print("Loading data...")
        
        train_inputs = []
        train_outputs = []
        
        for week in range(1, 19):
            try:
                inp_path = self.data_dir / f"train/input_2023_w{week:02d}.csv"
                out_path = self.data_dir / f"train/output_2023_w{week:02d}.csv"
                
                if inp_path.exists() and out_path.exists():
                    inp = pd.read_csv(inp_path)
                    out = pd.read_csv(out_path)
                    train_inputs.append(inp)
                    train_outputs.append(out)
                    print(f"Loaded week {week}")
            except Exception as e:
                print(f"Could not load week {week}: {e}")
                continue
        
        if not train_inputs:
            raise ValueError("No training data loaded!")
        
        train_input = pd.concat(train_inputs, ignore_index=True)
        train_output = pd.concat(train_outputs, ignore_index=True)
        
        del train_inputs, train_outputs
        gc.collect()
        
        print("Preparing features...")
        train_data = self._prepare_features(train_input, train_output, is_training=True)
        
        # Test data
        test_input = pd.read_csv(self.data_dir / "test_input.csv")
        test_template = pd.read_csv(self.data_dir / "test.csv")
        test_data = self._prepare_features(test_input, test_template, is_training=False)
        
        return train_data, test_data, test_template
    
    def _prepare_features(self, input_df, output_df, is_training):
        """Prepare features safely"""
        
        # Get last frame before throw
        last = input_df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
        last = last.groupby(['game_id', 'play_id', 'nfl_id'], as_index=False).last()
        last = last.rename(columns={'x': 'x_last', 'y': 'y_last'})
        
        # Merge with output
        data = output_df.copy()
        
        # Drop frame_id from last to avoid conflicts
        if 'frame_id' in last.columns:
            last = last.drop(columns=['frame_id'])
        
        data = data.merge(last, on=['game_id', 'play_id', 'nfl_id'], how='left')
        
        # Calculate displacements for training
        if is_training and 'x' in data.columns and 'y' in data.columns:
            data['displacement_x'] = (data['x'] - data['x_last'].fillna(data['x'])).clip(-10, 10)
            data['displacement_y'] = (data['y'] - data['y_last'].fillna(data['y'])).clip(-10, 10)
        
        # Fill NaN values
        numeric_cols = data.select_dtypes(include=[np.number]).columns
        data[numeric_cols] = data[numeric_cols].fillna(0)
        
        return data
    
    def train_ensemble(self, train_data, n_models=8, n_folds=5, epochs=25):
        """Train ensemble of models"""
        print(f"\nTraining ensemble: {n_models} models, {n_folds} folds")
        
        groups = train_data['game_id'].values
        gkf = GroupKFold(n_splits=n_folds)
        
        all_oof_predictions = []
        
        for model_idx in range(n_models):
            print(f"\n{'='*50}")
            print(f"Model {model_idx + 1}/{n_models}")
            print(f"{'='*50}")
            
            # Set seed for reproducibility
            seed = model_idx * 42
            torch.manual_seed(seed)
            np.random.seed(seed)
            
            model_oof = np.zeros((len(train_data), 2))
            fold_models = []
            
            for fold, (train_idx, val_idx) in enumerate(gkf.split(train_data, groups=groups)):
                print(f"\nFold {fold + 1}/{n_folds}")
                
                # Split data
                train_fold = train_data.iloc[train_idx]
                val_fold = train_data.iloc[val_idx]
                
                # Create datasets
                train_dataset = RobustNFLDataset(train_fold, is_training=True)
                self.stats_dict = train_dataset.stats_dict
                
                val_dataset = RobustNFLDataset(val_fold, is_training=True, 
                                              stats_dict=self.stats_dict)
                
                # Dataloaders
                train_loader = DataLoader(
                    train_dataset,
                    batch_size=512 * max(1, self.gpu_count),
                    shuffle=True,
                    num_workers=4 if self.device.type == 'cuda' else 0,
                    pin_memory=(self.device.type == 'cuda')
                )
                
                val_loader = DataLoader(
                    val_dataset,
                    batch_size=1024,
                    shuffle=False,
                    num_workers=4 if self.device.type == 'cuda' else 0,
                    pin_memory=(self.device.type == 'cuda')
                )
                
                # Initialize model
                model = RobustTrajectoryModel(
                    input_dim=train_dataset.features.shape[1],
                    hidden_dim=384 + (model_idx % 3) * 128,  # Vary architecture
                    num_layers=5 + (model_idx % 3),
                    dropout=0.2 + (model_idx % 4) * 0.05
                ).to(self.device)
                
                if self.gpu_count > 1:
                    model = nn.DataParallel(model)
                
                # Optimizer
                optimizer = torch.optim.AdamW(
                    model.parameters(),
                    lr=2e-3,
                    weight_decay=1e-4
                )
                
                # Scheduler
                scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                    optimizer, T_max=epochs
                )
                
                criterion = nn.SmoothL1Loss(reduction='none')
                
                # Training
                best_val_loss = float('inf')
                best_model_state = None
                patience = 0
                
                for epoch in range(epochs):
                    # Train
                    model.train()
                    train_loss = 0
                    
                    for batch in train_loader:
                        features = batch['features'].to(self.device)
                        targets = batch['target'].to(self.device)
                        weights = batch['weight'].to(self.device)
                        
                        if self.device.type == 'cuda' and self.scaler:
                            with autocast():
                                predictions = model(features)
                                loss = criterion(predictions, targets)
                                weighted_loss = (loss.mean(dim=1) * weights).mean()
                            
                            optimizer.zero_grad()
                            self.scaler.scale(weighted_loss).backward()
                            self.scaler.unscale_(optimizer)
                            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                            self.scaler.step(optimizer)
                            self.scaler.update()
                        else:
                            predictions = model(features)
                            loss = criterion(predictions, targets)
                            weighted_loss = (loss.mean(dim=1) * weights).mean()
                            
                            optimizer.zero_grad()
                            weighted_loss.backward()
                            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                            optimizer.step()
                        
                        scheduler.step()
                        train_loss += weighted_loss.item()
                    
                    # Validate every 3 epochs
                    if epoch % 3 == 0:
                        model.eval()
                        val_loss = 0
                        
                        with torch.no_grad():
                            for batch in val_loader:
                                features = batch['features'].to(self.device)
                                targets = batch['target'].to(self.device)
                                
                                predictions = model(features)
                                loss = criterion(predictions, targets).mean()
                                val_loss += loss.item()
                        
                        val_loss /= len(val_loader)
                        
                        if val_loss < best_val_loss:
                            best_val_loss = val_loss
                            best_model_state = model.state_dict().copy()
                            patience = 0
                        else:
                            patience += 1
                            if patience >= 3:
                                break
                        
                        print(f"  Epoch {epoch+1}: Val Loss={val_loss:.4f}")
                
                # Load best model
                if best_model_state is not None:
                    model.load_state_dict(best_model_state)
                
                fold_models.append(model)
                
                # OOF predictions
                model.eval()
                with torch.no_grad():
                    val_preds = []
                    for batch in val_loader:
                        features = batch['features'].to(self.device)
                        predictions = model(features)
                        val_preds.append(predictions.cpu().numpy())
                    
                    model_oof[val_idx] = np.vstack(val_preds)
            
            all_oof_predictions.append(model_oof)
            self.models.extend(fold_models)
        
        # Calculate OOF score
        final_oof = np.mean(all_oof_predictions, axis=0)
        x_pred = train_data['x_last'].values + final_oof[:, 0]
        y_pred = train_data['y_last'].values + final_oof[:, 1]
        
        if 'x' in train_data.columns and 'y' in train_data.columns:
            rmse = np.sqrt(
                ((x_pred - train_data['x'].values)**2 + 
                 (y_pred - train_data['y'].values)**2).mean() / 2
            )
            print(f"\n{'='*50}")
            print(f"Final OOF RMSE: {rmse:.6f}")
            print(f"{'='*50}")
    
    def predict(self, test_data, test_template):
        """Generate predictions"""
        print("\nGenerating predictions...")
        
        test_dataset = RobustNFLDataset(test_data, is_training=False, 
                                       stats_dict=self.stats_dict)
        test_loader = DataLoader(
            test_dataset,
            batch_size=1024,
            shuffle=False,
            num_workers=4 if self.device.type == 'cuda' else 0,
            pin_memory=(self.device.type == 'cuda')
        )
        
        all_predictions = []
        
        for i, model in enumerate(self.models):
            print(f"Predicting with model {i+1}/{len(self.models)}")
            model.eval()
            model_preds = []
            
            with torch.no_grad():
                for batch in test_loader:
                    features = batch['features'].to(self.device)
                    
                    if self.device.type == 'cuda':
                        with autocast():
                            predictions = model(features)
                    else:
                        predictions = model(features)
                    
                    model_preds.append(predictions.cpu().numpy())
            
            all_predictions.append(np.vstack(model_preds))
        
        # Average predictions
        final_predictions = np.mean(all_predictions, axis=0)
        
        # Apply physics constraints and create submission
        results = []
        for i in range(len(test_dataset)):
            dx = final_predictions[i, 0]
            dy = final_predictions[i, 1]
            
            x_last = test_dataset.metadata[i, 4]
            y_last = test_dataset.metadata[i, 5]
            frame_id = test_dataset.metadata[i, 3]
            
            # Frame-based decay
            frame_decay = 0.98 ** (frame_id / 5.0)
            dx *= frame_decay
            dy *= frame_decay
            
            # Speed limit
            max_speed = 9
            disp_mag = np.sqrt(dx**2 + dy**2)
            if disp_mag > max_speed:
                scale = max_speed / disp_mag
                dx *= scale
                dy *= scale
            
            # Calculate position
            x_pred = np.clip(x_last + dx, 0, 120)
            y_pred = np.clip(y_last + dy, 0, 53.3)
            
            results.append({
                'game_id': int(test_dataset.metadata[i, 0]),
                'play_id': int(test_dataset.metadata[i, 1]),
                'nfl_id': int(test_dataset.metadata[i, 2]),
                'frame_id': int(test_dataset.metadata[i, 3]),
                'x': x_pred,
                'y': y_pred
            })
        
        # Create submission
        pred_df = pd.DataFrame(results)
        submission = test_template.merge(
            pred_df,
            on=['game_id', 'play_id', 'nfl_id', 'frame_id'],
            how='left'
        )
        
        submission['x'] = submission['x'].fillna(60.0)
        submission['y'] = submission['y'].fillna(26.65)
        
        submission['id'] = (
            submission['game_id'].astype(str) + '_' +
            submission['play_id'].astype(str) + '_' +
            submission['nfl_id'].astype(str) + '_' +
            submission['frame_id'].astype(str)
        )
        
        return submission[['id', 'x', 'y']]

# ============ Main Execution ============
if __name__ == "__main__":
    start_time = time.time()
    
    print("="*60)
    print("NFL Big Data Bowl 2026 - Robust Pipeline")
    print("="*60)
    
    # Initialize
    pipeline = RobustPipeline()
    
    # Load data
    train_data, test_data, test_template = pipeline.load_and_prepare_data()
    print(f"\nData loaded:")
    print(f"Training samples: {len(train_data):,}")
    print(f"Test samples: {len(test_data):,}")
    
    # Train ensemble
    pipeline.train_ensemble(
        train_data,
        n_models=10,  # 10 diverse models
        n_folds=5,    # 5-fold CV
        epochs=30     # 30 epochs with early stopping
    )
    
    # Generate predictions
    submission = pipeline.predict(test_data, test_template)
    
    # Save
    submission.to_csv('submission.csv', index=False)
    
    elapsed = (time.time() - start_time) / 3600
    
    print(f"\n{'='*60}")
    print(f"Pipeline completed in {elapsed:.2f} hours")
    print(f"Submission shape: {submission.shape}")
    print("\nFirst 10 predictions:")
    print(submission.head(10))
    
    # Clear GPU
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    print("="*60)