In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# ================================================================================
# NFL BIG DATA BOWL 2026 - ENHANCED WITH GRAPH NEURAL FEATURES
# Incorporates lightweight graph convolutions and attention mechanisms
# ================================================================================

import numpy as np
import pandas as pd
import warnings
import gc
from pathlib import Path
from tqdm.auto import tqdm
from scipy.ndimage import gaussian_filter1d
from scipy.spatial.distance import cdist
from scipy.spatial import Voronoi
from scipy.stats import entropy
from sklearn.cluster import DBSCAN

# Machine Learning
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

# Deep Learning
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

warnings.filterwarnings('ignore')

# ================================================================================
# CONFIGURATION
# ================================================================================

class Config:
    DATA_DIR = Path("/kaggle/input/nfl-big-data-bowl-2026-prediction/")
    SEEDS = [42, 123]  # Reduced to 2 seeds as requested
    FIELD_X_MIN, FIELD_X_MAX = 0.0, 120.0
    FIELD_Y_MIN, FIELD_Y_MAX = 0.0, 53.3
    MAX_SPEED = 12.0
    N_FOLDS = 3
    NN_BATCH_SIZE = 4096
    NN_EPOCHS = 20
    NN_LEARNING_RATE = 0.002
    GRAPH_DISTANCE_THRESHOLD = 15.0
    GRAPH_K_NEAREST = 5

# ================================================================================
# DATA LOADING
# ================================================================================

def load_data():
    """Load all training and test data"""
    print("Loading data...")
    
    train_input_files = [Config.DATA_DIR / f"train/input_2023_w{w:02d}.csv" for w in range(1, 19)]
    train_output_files = [Config.DATA_DIR / f"train/output_2023_w{w:02d}.csv" for w in range(1, 19)]
    
    train_input_files = [f for f in train_input_files if f.exists()]
    train_output_files = [f for f in train_output_files if f.exists()]
    
    print(f"Found {len(train_input_files)} weeks of data")
    
    train_input = pd.concat([pd.read_csv(f) for f in tqdm(train_input_files, desc="Input")], ignore_index=True)
    train_output = pd.concat([pd.read_csv(f) for f in tqdm(train_output_files, desc="Output")], ignore_index=True)
    
    test_input = pd.read_csv(Config.DATA_DIR / "test_input.csv")
    test_template = pd.read_csv(Config.DATA_DIR / "test.csv")
    
    print(f"Loaded {len(train_input):,} input records, {len(train_output):,} output records")
    
    return train_input, train_output, test_input, test_template

# ================================================================================
# GRAPH NEURAL NETWORK COMPONENTS
# ================================================================================

class SimpleGraphConvLayer(nn.Module):
    """Lightweight graph convolution layer"""
    def __init__(self, in_features, out_features):
        super().__init__()
        self.linear = nn.Linear(in_features * 2, out_features)
        self.activation = nn.ReLU()
        
    def forward(self, node_features, edge_index):
        # Simple message passing
        row, col = edge_index
        messages = torch.cat([node_features[row], node_features[col]], dim=1)
        aggregated = torch.zeros_like(node_features[:, :self.linear.out_features])
        
        for i in range(len(node_features)):
            mask = (row == i)
            if mask.any():
                aggregated[i] = self.linear(messages[mask]).mean(dim=0)
        
        return self.activation(aggregated)

class LightweightGraphNet(nn.Module):
    """Simple GNN for extracting graph features"""
    def __init__(self, input_dim=6, hidden_dim=32, output_dim=16):
        super().__init__()
        self.node_encoder = nn.Linear(input_dim, hidden_dim)
        self.conv1 = SimpleGraphConvLayer(hidden_dim, hidden_dim)
        self.conv2 = SimpleGraphConvLayer(hidden_dim, output_dim)
        self.global_pool = nn.Linear(output_dim, output_dim)
        
    def forward(self, node_features, edge_index):
        # Encode nodes
        x = F.relu(self.node_encoder(node_features))
        
        # Graph convolutions
        x = self.conv1(x, edge_index)
        x = self.conv2(x, edge_index)
        
        # Global pooling
        graph_embedding = x.mean(dim=0)
        
        return graph_embedding, x

def build_play_graph(frame_data):
    """Build graph structure for a single frame"""
    positions = frame_data[['x', 'y']].values
    n_players = len(positions)
    
    if n_players < 2:
        return None, None
    
    # Calculate distances
    distances = cdist(positions, positions)
    
    # Build edges based on KNN and distance threshold
    edges = []
    for i in range(n_players):
        # Get k nearest neighbors
        nearest_indices = np.argsort(distances[i])[1:Config.GRAPH_K_NEAREST+1]
        for j in nearest_indices:
            if distances[i, j] < Config.GRAPH_DISTANCE_THRESHOLD:
                edges.append([i, j])
                edges.append([j, i])  # Bidirectional
    
    if len(edges) == 0:
        return None, None
    
    edge_index = torch.tensor(edges, dtype=torch.long).t()
    
    # Node features
    node_features = torch.tensor(
        frame_data[['x', 'y', 's', 'a', 'o', 'dir']].values,
        dtype=torch.float32
    )
    
    return node_features, edge_index

def extract_graph_features(input_df):
    """Extract graph-based features for all plays"""
    print("  Computing graph neural features...")
    
    # Initialize simple GNN
    gnn_model = LightweightGraphNet()
    gnn_model.eval()
    
    graph_features = []
    
    for (game_id, play_id), play_group in tqdm(input_df.groupby(['game_id', 'play_id']), 
                                                desc="Graph features", leave=False):
        # Get last frame
        last_frame = play_group.sort_values('frame_id').groupby('nfl_id').last().reset_index()
        
        # Build graph
        node_features, edge_index = build_play_graph(last_frame)
        
        if node_features is None:
            continue
        
        # Extract features using GNN
        with torch.no_grad():
            graph_embedding, node_embeddings = gnn_model(node_features, edge_index)
        
        # Store features for each player
        for i, (_, player) in enumerate(last_frame.iterrows()):
            player_features = {
                'game_id': game_id,
                'play_id': play_id,
                'nfl_id': player['nfl_id']
            }
            
            # Add node embedding features
            node_embed = node_embeddings[i].numpy()
            for j, val in enumerate(node_embed[:8]):  # Use first 8 features
                player_features[f'graph_node_feat_{j}'] = val
            
            # Add graph-level features (same for all players in the play)
            for j, val in enumerate(graph_embedding.numpy()[:8]):
                player_features[f'graph_global_feat_{j}'] = val
            
            graph_features.append(player_features)
    
    return pd.DataFrame(graph_features)

# ================================================================================
# ATTENTION-BASED PLAYER PAIR FEATURES
# ================================================================================

def compute_attention_features(input_df):
    """Compute attention scores between key player pairs"""
    print("  Computing attention-based interaction features...")
    features = []
    
    for (game_id, play_id), play_group in input_df.groupby(['game_id', 'play_id']):
        last_frame = play_group.sort_values('frame_id').groupby('nfl_id').last().reset_index()
        
        # Get key players
        target_receiver = last_frame[last_frame['player_role'] == 'Targeted Receiver']
        defenders = last_frame[last_frame['player_side'] == 'Defense']
        other_receivers = last_frame[last_frame['player_role'] == 'Other Route Runner']
        
        if len(target_receiver) == 0:
            continue
        
        target = target_receiver.iloc[0]
        target_pos = np.array([target['x'], target['y']])
        target_vel = np.array([target['s'] * np.sin(np.deg2rad(target['dir'])),
                               target['s'] * np.cos(np.deg2rad(target['dir']))])
        
        # Compute attention weights for defenders to target
        if len(defenders) > 0:
            def_positions = defenders[['x', 'y']].values
            def_velocities = np.array([
                defenders['s'].values * np.sin(np.deg2rad(defenders['dir'].values)),
                defenders['s'].values * np.cos(np.deg2rad(defenders['dir'].values))
            ]).T
            
            # Distance-based attention
            distances = np.sqrt(np.sum((def_positions - target_pos)**2, axis=1))
            distance_attention = F.softmax(-torch.tensor(distances), dim=0).numpy()
            
            # Velocity alignment attention
            vel_alignments = []
            for def_vel in def_velocities:
                alignment = np.dot(def_vel, target_vel) / (np.linalg.norm(def_vel) * np.linalg.norm(target_vel) + 1e-6)
                vel_alignments.append(alignment)
            velocity_attention = F.softmax(torch.tensor(vel_alignments), dim=0).numpy()
            
            # Combined attention
            combined_attention = (distance_attention + velocity_attention) / 2
            
            # Store features for defenders
            for i, (_, defender) in enumerate(defenders.iterrows()):
                features.append({
                    'game_id': game_id,
                    'play_id': play_id,
                    'nfl_id': defender['nfl_id'],
                    'attention_to_target_distance': distance_attention[i],
                    'attention_to_target_velocity': velocity_attention[i],
                    'attention_to_target_combined': combined_attention[i],
                    'is_primary_defender': int(combined_attention[i] == combined_attention.max()),
                    'attention_rank': len(defenders) - np.argsort(combined_attention).tolist().index(i)
                })
        
        # Compute attention between receivers (route combinations)
        if len(other_receivers) > 0:
            all_receivers = pd.concat([target_receiver, other_receivers])
            rec_positions = all_receivers[['x', 'y']].values
            
            # Pairwise attention matrix
            rec_distances = cdist(rec_positions, rec_positions)
            np.fill_diagonal(rec_distances, np.inf)
            
            for i, (_, receiver) in enumerate(all_receivers.iterrows()):
                min_receiver_dist = np.min(rec_distances[i])
                features.append({
                    'game_id': game_id,
                    'play_id': play_id,
                    'nfl_id': receiver['nfl_id'],
                    'min_receiver_spacing': min_receiver_dist,
                    'receiver_isolation': int(min_receiver_dist > 10),
                    'route_proximity_score': 1 / (min_receiver_dist + 1)
                })
    
    return pd.DataFrame(features)

# ================================================================================
# ENHANCED PLAYER INTERACTION FEATURES
# ================================================================================

def compute_player_interactions(input_df):
    """Compute player-to-player interaction features with graph context"""
    print("  Computing player interaction features...")
    
    interaction_features = []
    
    for (game_id, play_id), play_group in input_df.groupby(['game_id', 'play_id']):
        last_frame = play_group.sort_values('frame_id').groupby('nfl_id').last().reset_index()
        
        positions = last_frame[['x', 'y']].values
        player_ids = last_frame['nfl_id'].values
        player_sides = last_frame['player_side'].values
        player_roles = last_frame['player_role'].values
        speeds = last_frame['s'].values
        directions = last_frame['dir'].values
        orientations = last_frame['o'].values
        
        if len(positions) > 1:
            distances = cdist(positions, positions)
            
            # Compute graph centrality (simplified)
            adjacency = (distances < Config.GRAPH_DISTANCE_THRESHOLD).astype(float)
            np.fill_diagonal(adjacency, 0)
            degree_centrality = adjacency.sum(axis=1) / (len(positions) - 1)
            
            for i, player_id in enumerate(player_ids):
                player_features = {
                    'game_id': game_id,
                    'play_id': play_id,
                    'nfl_id': player_id,
                    'graph_degree_centrality': degree_centrality[i]
                }
                
                same_side_mask = (player_sides == player_sides[i]) & (np.arange(len(positions)) != i)
                opp_side_mask = (player_sides != player_sides[i])
                
                if np.any(same_side_mask):
                    teammate_distances = distances[i][same_side_mask]
                    player_features['nearest_teammate_dist'] = np.min(teammate_distances)
                    player_features['avg_teammate_dist'] = np.mean(teammate_distances)
                    player_features['teammates_within_5'] = np.sum(teammate_distances < 5)
                    player_features['teammates_within_10'] = np.sum(teammate_distances < 10)
                    player_features['teammates_within_3'] = np.sum(teammate_distances < 3)
                    
                    # Graph-based team cohesion
                    teammate_adjacency = adjacency[i][same_side_mask]
                    player_features['team_graph_connectivity'] = np.mean(teammate_adjacency)
                else:
                    player_features['nearest_teammate_dist'] = 0
                    player_features['avg_teammate_dist'] = 0
                    player_features['teammates_within_5'] = 0
                    player_features['teammates_within_10'] = 0
                    player_features['teammates_within_3'] = 0
                    player_features['team_graph_connectivity'] = 0
                
                if np.any(opp_side_mask):
                    opponent_distances = distances[i][opp_side_mask]
                    player_features['nearest_opponent_dist'] = np.min(opponent_distances)
                    player_features['avg_opponent_dist'] = np.mean(opponent_distances)
                    player_features['opponents_within_5'] = np.sum(opponent_distances < 5)
                    player_features['opponents_within_10'] = np.sum(opponent_distances < 10)
                    player_features['opponents_within_3'] = np.sum(opponent_distances < 3)
                    player_features['pressure_index'] = 1 / (player_features['nearest_opponent_dist'] + 1)
                    
                    opp_speeds = speeds[opp_side_mask]
                    speed_weighted_distances = opponent_distances / (opp_speeds + 1)
                    player_features['speed_weighted_pressure'] = 1 / (np.min(speed_weighted_distances) + 1)
                    
                    # Graph-based opponent pressure
                    opponent_adjacency = adjacency[i][opp_side_mask]
                    player_features['opponent_graph_pressure'] = np.sum(opponent_adjacency)
                else:
                    player_features['nearest_opponent_dist'] = 100
                    player_features['avg_opponent_dist'] = 100
                    player_features['opponents_within_5'] = 0
                    player_features['opponents_within_10'] = 0
                    player_features['opponents_within_3'] = 0
                    player_features['pressure_index'] = 0
                    player_features['speed_weighted_pressure'] = 0
                    player_features['opponent_graph_pressure'] = 0
                
                player_features['local_density'] = np.sum(distances[i] < 10) - 1
                
                if np.any(same_side_mask):
                    teammate_positions = positions[same_side_mask]
                    if len(teammate_positions) > 0:
                        player_features['team_spread_x'] = np.std(teammate_positions[:, 0])
                        player_features['team_spread_y'] = np.std(teammate_positions[:, 1])
                        player_features['team_centroid_x'] = np.mean(teammate_positions[:, 0])
                        player_features['team_centroid_y'] = np.mean(teammate_positions[:, 1])
                        player_features['team_compactness'] = np.mean(np.sqrt(np.sum((teammate_positions - teammate_positions.mean(axis=0))**2, axis=1)))
                    else:
                        player_features['team_spread_x'] = 0
                        player_features['team_spread_y'] = 0
                        player_features['team_centroid_x'] = positions[i, 0]
                        player_features['team_centroid_y'] = positions[i, 1]
                        player_features['team_compactness'] = 0
                
                if player_roles[i] == 'Targeted Receiver':
                    coverage_mask = (player_roles == 'Defensive Coverage') & opp_side_mask
                    if np.any(coverage_mask):
                        coverage_distances = distances[i][coverage_mask]
                        player_features['nearest_coverage_dist'] = np.min(coverage_distances)
                        player_features['coverage_players_nearby'] = np.sum(coverage_distances < 10)
                        player_features['coverage_density'] = np.sum(coverage_distances < 5)
                        
                        nearest_cov_idx = np.argmin(coverage_distances)
                        cov_positions = positions[coverage_mask]
                        cov_pos = cov_positions[nearest_cov_idx]
                        
                        receiver_to_sideline = min(positions[i, 1], Config.FIELD_Y_MAX - positions[i, 1])
                        defender_to_sideline = min(cov_pos[1], Config.FIELD_Y_MAX - cov_pos[1])
                        player_features['inside_leverage'] = int(defender_to_sideline < receiver_to_sideline)
                        player_features['outside_leverage'] = int(defender_to_sideline > receiver_to_sideline)
                
                if np.any(opp_side_mask):
                    nearest_opp_idx = np.argmin(distances[i][opp_side_mask])
                    opp_positions = positions[opp_side_mask]
                    dx = opp_positions[nearest_opp_idx, 0] - positions[i, 0]
                    dy = opp_positions[nearest_opp_idx, 1] - positions[i, 1]
                    player_features['nearest_opp_dx'] = dx
                    player_features['nearest_opp_dy'] = dy
                    player_features['nearest_opp_angle'] = np.arctan2(dy, dx)
                
                interaction_features.append(player_features)
    
    return pd.DataFrame(interaction_features)

# ================================================================================
# TRAJECTORY FEATURES
# ================================================================================

def add_trajectory_features(input_df):
    """Enhanced trajectory analysis"""
    print("  Computing trajectory features...")
    features = []
    
    for (game_id, play_id, nfl_id), group in input_df.groupby(['game_id', 'play_id', 'nfl_id']):
        sorted_group = group.sort_values('frame_id')
        
        feature_dict = {
            'game_id': game_id,
            'play_id': play_id,
            'nfl_id': nfl_id
        }
        
        if len(sorted_group) >= 3:
            directions = np.deg2rad(sorted_group['dir'].values)
            direction_diffs = np.diff(directions)
            feature_dict['path_curvature'] = np.std(direction_diffs)
            feature_dict['max_direction_change'] = np.max(np.abs(direction_diffs))
            
            positions = sorted_group[['x', 'y']].values
            path_length = np.sum(np.sqrt(np.sum(np.diff(positions, axis=0)**2, axis=1)))
            straight_distance = np.sqrt((positions[-1][0] - positions[0][0])**2 + 
                                       (positions[-1][1] - positions[0][1])**2)
            feature_dict['path_sinuosity'] = path_length / (straight_distance + 0.1)
            feature_dict['total_path_length'] = path_length
        else:
            feature_dict['path_curvature'] = 0
            feature_dict['path_sinuosity'] = 1
            feature_dict['max_direction_change'] = 0
            feature_dict['total_path_length'] = 0
        
        if len(sorted_group) >= 3:
            feature_dict['jerk'] = np.std(np.diff(sorted_group['a'].values))
            feature_dict['max_acceleration'] = np.max(np.abs(sorted_group['a'].values))
        else:
            feature_dict['jerk'] = 0
            feature_dict['max_acceleration'] = 0
            
        features.append(feature_dict)
    
    return pd.DataFrame(features)

# ================================================================================
# ROLLING STATISTICS
# ================================================================================

def compute_rolling_stats(input_df):
    """Compute rolling window statistics"""
    print("  Computing rolling statistics...")
    features = []
    
    window_sizes = [3, 5]
    
    for (game_id, play_id, nfl_id), group in input_df.groupby(['game_id', 'play_id', 'nfl_id']):
        sorted_group = group.sort_values('frame_id')
        
        feature_dict = {
            'game_id': game_id,
            'play_id': play_id,
            'nfl_id': nfl_id
        }
        
        for window in window_sizes:
            if len(sorted_group) >= window:
                recent = sorted_group.tail(window)
                
                feature_dict[f'rolling_speed_mean_{window}'] = recent['s'].mean()
                feature_dict[f'rolling_speed_std_{window}'] = recent['s'].std()
                feature_dict[f'rolling_accel_mean_{window}'] = recent['a'].mean()
                feature_dict[f'rolling_dir_variance_{window}'] = recent['dir'].var()
                
                if len(recent) >= 2:
                    speed_changes = np.diff(recent['s'].values)
                    feature_dict[f'speed_acceleration_{window}'] = np.mean(speed_changes)
                else:
                    feature_dict[f'speed_acceleration_{window}'] = 0
            else:
                feature_dict[f'rolling_speed_mean_{window}'] = 0
                feature_dict[f'rolling_speed_std_{window}'] = 0
                feature_dict[f'rolling_accel_mean_{window}'] = 0
                feature_dict[f'rolling_dir_variance_{window}'] = 0
                feature_dict[f'speed_acceleration_{window}'] = 0
        
        features.append(feature_dict)
    
    return pd.DataFrame(features)

# ================================================================================
# SEPARATION DYNAMICS
# ================================================================================

def compute_separation_dynamics(input_df):
    """Compute how separation is changing over time"""
    print("  Computing separation dynamics...")
    features = []
    
    for (game_id, play_id), play_group in input_df.groupby(['game_id', 'play_id']):
        target_data = play_group[play_group['player_role'] == 'Targeted Receiver']
        if len(target_data) == 0:
            continue
        
        target_id = target_data['nfl_id'].iloc[0]
        target_frames = target_data.sort_values('frame_id')
        
        defenders = play_group[play_group['player_side'] == 'Defense']
        
        for defender_id in defenders['nfl_id'].unique():
            defender_frames = play_group[play_group['nfl_id'] == defender_id].sort_values('frame_id')
            
            common_frames = np.intersect1d(target_frames['frame_id'].values, defender_frames['frame_id'].values)
            
            if len(common_frames) >= 3:
                target_pos = target_frames[target_frames['frame_id'].isin(common_frames)][['x', 'y']].values
                defender_pos = defender_frames[defender_frames['frame_id'].isin(common_frames)][['x', 'y']].values
                
                separations = np.sqrt(np.sum((target_pos - defender_pos)**2, axis=1))
                
                if len(separations) >= 2:
                    separation_rate = np.mean(np.diff(separations)) * 10
                    separation_accel = np.std(np.diff(separations)) * 10
                    max_separation = np.max(separations)
                    min_separation = np.min(separations)
                else:
                    separation_rate = 0
                    separation_accel = 0
                    max_separation = separations[0]
                    min_separation = separations[0]
                
                features.append({
                    'game_id': game_id,
                    'play_id': play_id,
                    'nfl_id': defender_id,
                    'separation_rate': separation_rate,
                    'separation_acceleration': separation_accel,
                    'max_separation_achieved': max_separation,
                    'min_separation': min_separation,
                    'separation_range': max_separation - min_separation
                })
    
    return pd.DataFrame(features)

# ================================================================================
# ROUTE CLASSIFICATION FEATURES
# ================================================================================

def classify_route_pattern(input_df):
    """Infer likely route type from pre-snap movement"""
    print("  Computing route classification features...")
    features = []
    
    for (game_id, play_id, nfl_id), group in input_df.groupby(['game_id', 'play_id', 'nfl_id']):
        if group['player_role'].iloc[0] not in ['Targeted Receiver', 'Other Route Runner']:
            continue
            
        sorted_group = group.sort_values('frame_id')
        if len(sorted_group) < 3:
            continue
        
        x_vals = sorted_group['x'].values
        y_vals = sorted_group['y'].values
        
        vertical_displacement = x_vals[-1] - x_vals[0]
        horizontal_displacement = abs(y_vals[-1] - y_vals[0])
        
        route_depth = abs(vertical_displacement)
        route_angle = np.arctan2(horizontal_displacement, abs(vertical_displacement) + 0.1)
        
        if len(sorted_group) >= 5:
            directions = np.deg2rad(sorted_group['dir'].values)
            direction_changes = np.abs(np.diff(directions))
            has_break = np.any(direction_changes > np.pi/4)
            break_point_frame = np.argmax(direction_changes) if has_break else 0
            num_breaks = np.sum(direction_changes > np.pi/4)
        else:
            has_break = False
            break_point_frame = 0
            num_breaks = 0
        
        features.append({
            'game_id': game_id, 'play_id': play_id, 'nfl_id': nfl_id,
            'route_depth': route_depth,
            'route_angle': route_angle,
            'has_route_break': int(has_break),
            'break_point_normalized': break_point_frame / (len(sorted_group) - 1) if len(sorted_group) > 1 else 0,
            'is_vertical_route': int(abs(route_angle) < np.pi/6),
            'is_crossing_route': int(abs(route_angle) > np.pi/3),
            'num_route_breaks': num_breaks,
            'route_complexity': horizontal_displacement + route_depth
        })
    
    return pd.DataFrame(features)

# ================================================================================
# PURSUIT ANGLE FEATURES
# ================================================================================

def compute_pursuit_features(input_df):
    """Calculate pursuit angles and relative velocities"""
    print("  Computing pursuit angle features...")
    features = []
    
    for (game_id, play_id), play_group in input_df.groupby(['game_id', 'play_id']):
        last_frame = play_group.sort_values('frame_id').groupby('nfl_id').last().reset_index()
        
        target = last_frame[last_frame['player_role'] == 'Targeted Receiver']
        defenders = last_frame[last_frame['player_side'] == 'Defense']
        
        if len(target) == 0 or len(defenders) == 0:
            continue
            
        target = target.iloc[0]
        target_vel = np.array([
            target['s'] * np.sin(np.deg2rad(target['dir'])),
            target['s'] * np.cos(np.deg2rad(target['dir']))
        ])
        target_pos = np.array([target['x'], target['y']])
        
        for _, defender in defenders.iterrows():
            defender_pos = np.array([defender['x'], defender['y']])
            defender_vel = np.array([
                defender['s'] * np.sin(np.deg2rad(defender['dir'])),
                defender['s'] * np.cos(np.deg2rad(defender['dir']))
            ])
            
            to_target = target_pos - defender_pos
            to_target_norm = to_target / (np.linalg.norm(to_target) + 0.1)
            
            defender_vel_norm = defender_vel / (np.linalg.norm(defender_vel) + 0.1)
            pursuit_angle = np.arccos(np.clip(np.dot(defender_vel_norm, to_target_norm), -1, 1))
            
            relative_vel = target_vel - defender_vel
            closing_speed = -np.dot(relative_vel, to_target_norm)
            
            distance = np.linalg.norm(to_target)
            time_to_intercept = distance / (closing_speed + 0.1) if closing_speed > 0 else 999
            
            features.append({
                'game_id': game_id,
                'play_id': play_id,
                'nfl_id': defender['nfl_id'],
                'pursuit_angle_to_target': pursuit_angle,
                'closing_speed_to_target': closing_speed,
                'time_to_intercept': min(time_to_intercept, 10),
                'pursuit_efficiency': np.cos(pursuit_angle) * defender['s'],
                'lateral_separation': abs(np.cross(to_target_norm, defender_vel_norm))
            })
    
    return pd.DataFrame(features)

# ================================================================================
# FORMATION AND COVERAGE FEATURES
# ================================================================================

def add_formation_features(input_df):
    """Analyze formation structure"""
    print("  Computing formation features...")
    features = []
    
    for (game_id, play_id), play_group in input_df.groupby(['game_id', 'play_id']):
        last_frame = play_group.sort_values('frame_id').groupby('nfl_id').last().reset_index()
        
        offense = last_frame[last_frame['player_side'] == 'Offense']
        defense = last_frame[last_frame['player_side'] == 'Defense']
        
        if len(offense) < 2 or len(defense) < 2:
            continue
        
        off_positions = offense[['x', 'y']].values
        off_x_spread = np.max(off_positions[:, 0]) - np.min(off_positions[:, 0])
        off_y_spread = np.max(off_positions[:, 1]) - np.min(off_positions[:, 1])
        off_x_center = np.mean(off_positions[:, 0])
        off_y_center = np.mean(off_positions[:, 1])
        
        def_positions = defense[['x', 'y']].values
        def_x_spread = np.max(def_positions[:, 0]) - np.min(def_positions[:, 0])
        def_y_spread = np.max(def_positions[:, 1]) - np.min(def_positions[:, 1])
        
        line_of_scrimmage = offense['x'].mean()
        defenders_in_box = np.sum((defense['x'] < line_of_scrimmage + 5) & 
                                   (defense['y'] > 15) & (defense['y'] < 38))
        defenders_deep = np.sum(defense['x'] > line_of_scrimmage + 10)
        
        receivers = offense[offense['player_role'].isin(['Targeted Receiver', 'Other Route Runner'])]
        if len(receivers) >= 3:
            rec_y = receivers['y'].values
            left_receivers = np.sum(rec_y < 26.65)
            right_receivers = np.sum(rec_y > 26.65)
            is_trips = max(left_receivers, right_receivers) >= 3
            is_bunch = np.any(cdist(receivers[['x', 'y']].values, receivers[['x', 'y']].values) < 3) if len(receivers) > 1 else False
        else:
            is_trips = False
            is_bunch = False
        
        for _, player in last_frame.iterrows():
            player_features = {
                'game_id': game_id,
                'play_id': play_id,
                'nfl_id': player['nfl_id'],
                'offensive_width': off_y_spread,
                'offensive_depth': off_x_spread,
                'defensive_width': def_y_spread,
                'defenders_in_box': defenders_in_box,
                'defenders_deep': defenders_deep,
                'formation_width_ratio': off_y_spread / (def_y_spread + 1),
                'dist_from_off_center_x': player['x'] - off_x_center,
                'dist_from_off_center_y': player['y'] - off_y_center,
                'is_trips_formation': int(is_trips),
                'is_bunch_formation': int(is_bunch)
            }
            features.append(player_features)
    
    return pd.DataFrame(features)

# ================================================================================
# SPACE CONTROL FEATURES
# ================================================================================

def compute_space_control(input_df):
    """Calculate space control/influence for each player"""
    print("  Computing space control features...")
    features = []
    
    for (game_id, play_id), play_group in input_df.groupby(['game_id', 'play_id']):
        last_frame = play_group.sort_values('frame_id').groupby('nfl_id').last().reset_index()
        
        positions = last_frame[['x', 'y']].values
        speeds = last_frame['s'].values
        
        if len(positions) < 4:
            continue
        
        try:
            vor = Voronoi(positions)
            voronoi_available = True
        except:
            voronoi_available = False
        
        for i, (player_id, player_side, speed) in enumerate(zip(last_frame['nfl_id'], 
                                                                  last_frame['player_side'],
                                                                  last_frame['s'])):
            distances = np.sqrt(np.sum((positions - positions[i])**2, axis=1))
            
            immediate_control = np.pi * 5**2
            speed_factor = 1 + (speed / 10)
            weighted_control = immediate_control * speed_factor
            
            same_side_mask = last_frame['player_side'] == player_side
            opp_mask = ~same_side_mask
            
            if np.any(opp_mask):
                nearest_opp_dist = np.min(distances[opp_mask])
                space_cushion = max(0, nearest_opp_dist - 3)
            else:
                space_cushion = 10
            
            player_features = {
                'game_id': game_id,
                'play_id': play_id,
                'nfl_id': player_id,
                'control_area': min(weighted_control, 200),
                'space_cushion': space_cushion,
                'control_x_speed': weighted_control * speed
            }
            
            if voronoi_available:
                try:
                    region_index = vor.point_region[i]
                    region = vor.regions[region_index]
                    if -1 not in region and len(region) > 0:
                        polygon = [vor.vertices[j] for j in region]
                        polygon = np.array(polygon)
                        x = polygon[:, 0]
                        y = polygon[:, 1]
                        area = 0.5 * abs(sum(x[j]*y[j+1] - x[j+1]*y[j] for j in range(len(polygon)-1)))
                        player_features['voronoi_cell_area'] = min(area, 500)
                    else:
                        player_features['voronoi_cell_area'] = weighted_control
                except:
                    player_features['voronoi_cell_area'] = weighted_control
            else:
                player_features['voronoi_cell_area'] = weighted_control
            
            features.append(player_features)
    
    return pd.DataFrame(features)

# ================================================================================
# MAIN FEATURE PREPARATION
# ================================================================================

def height_to_inches(height_str):
    """Convert height from 'ft-in' format to inches"""
    if not isinstance(height_str, str) or '-' not in height_str:
        return 70
    try:
        feet, inches = map(int, height_str.split('-'))
        return feet * 12 + inches
    except:
        return 70

def prepare_features(input_df, output_df, is_training=True):
    """Complete feature preparation pipeline with graph features"""
    
    # Compute all feature sets
    interaction_features = compute_player_interactions(input_df)
    trajectory_features = add_trajectory_features(input_df)
    rolling_features = compute_rolling_stats(input_df)
    separation_features = compute_separation_dynamics(input_df)
    route_features = classify_route_pattern(input_df)
    pursuit_features = compute_pursuit_features(input_df)
    formation_features = add_formation_features(input_df)
    space_features = compute_space_control(input_df)
    
    # NEW: Graph and attention features
    graph_features = extract_graph_features(input_df)
    attention_features = compute_attention_features(input_df)
    
    # Get last frame
    last_frame = input_df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id']) \
                         .groupby(['game_id', 'play_id', 'nfl_id'], as_index=False).last()
    last_frame = last_frame.rename(columns={'x': 'x_last', 'y': 'y_last'})
    
    # Merge all features
    for feat_df in [interaction_features, trajectory_features, rolling_features, 
                     separation_features, route_features, pursuit_features, 
                     formation_features, space_features, graph_features, attention_features]:
        if len(feat_df) > 0:
            last_frame = last_frame.merge(feat_df, on=['game_id', 'play_id', 'nfl_id'], how='left')
    
    # Temporal statistics
    temporal_stats = input_df.groupby(['game_id', 'play_id', 'nfl_id']).agg({
        'x': ['mean', 'std', 'min', 'max'],
        'y': ['mean', 'std', 'min', 'max'],
        's': ['mean', 'std', 'max', 'min'],
        'a': ['mean', 'std', 'max', 'min'],
        'dir': lambda x: np.std(np.diff(x)) if len(x) > 1 else 0,
        'o': lambda x: np.std(np.diff(x)) if len(x) > 1 else 0,
    }).reset_index()
    temporal_stats.columns = ['_'.join(col).strip() if col[1] else col[0] 
                              for col in temporal_stats.columns.values]
    temporal_stats = temporal_stats.rename(columns={
        'dir_<lambda>': 'dir_change_rate',
        'o_<lambda>': 'orientation_change_rate'
    })
    
    # Recent frames
    last_n_frames = 5
    recent_frames = input_df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id']) \
                            .groupby(['game_id', 'play_id', 'nfl_id']).tail(last_n_frames)
    
    trajectory_stats = recent_frames.groupby(['game_id', 'play_id', 'nfl_id']).agg({
        'x': lambda x: (x.iloc[-1] - x.iloc[0]) if len(x) > 1 else 0,
        'y': lambda x: (x.iloc[-1] - x.iloc[0]) if len(x) > 1 else 0,
        's': lambda x: x.diff().mean() if len(x) > 1 else 0,
    }).reset_index()
    trajectory_stats.columns = ['game_id', 'play_id', 'nfl_id', 
                                  'recent_displacement_x', 'recent_displacement_y', 'acceleration_trend']
    
    last_frame = last_frame.merge(temporal_stats, on=['game_id', 'play_id', 'nfl_id'], how='left')
    last_frame = last_frame.merge(trajectory_stats, on=['game_id', 'play_id', 'nfl_id'], how='left')
    
    if 'player_height' in last_frame.columns:
        last_frame['height_inches'] = last_frame['player_height'].apply(height_to_inches)
    
    # Target receiver position
    targets = last_frame[last_frame['player_role'] == 'Targeted Receiver'][
        ['game_id', 'play_id', 'x_last', 'y_last']
    ].rename(columns={'x_last': 'target_x', 'y_last': 'target_y'})
    targets = targets.drop_duplicates(['game_id', 'play_id'])
    
    last_frame = last_frame.merge(targets, on=['game_id', 'play_id'], how='left')
    
    # Merge with output
    merge_cols = [col for col in last_frame.columns if col not in ['frame_id']]
    merged = output_df.merge(last_frame[merge_cols], 
                             on=['game_id', 'play_id', 'nfl_id'], 
                             how='left')
    
    df = merged.copy()
    
    # Core temporal features
    df['time_seconds'] = df['frame_id'] / 10.0
    df['time_normalized'] = df['frame_id'] / df.groupby(['game_id', 'play_id', 'nfl_id'])['frame_id'].transform('max')
    df['time_squared'] = df['time_seconds'] ** 2
    df['time_cubed'] = df['time_seconds'] ** 3
    df['sqrt_time'] = np.sqrt(df['time_seconds'])
    df['log_time'] = np.log1p(df['time_seconds'])
    df['time_sin'] = np.sin(2 * np.pi * df['time_normalized'])
    df['time_cos'] = np.cos(2 * np.pi * df['time_normalized'])
    df['time_sin_2'] = np.sin(4 * np.pi * df['time_normalized'])
    df['time_cos_2'] = np.cos(4 * np.pi * df['time_normalized'])
    
    df['is_early_play'] = (df['time_normalized'] < 0.33).astype(int)
    df['is_mid_play'] = ((df['time_normalized'] >= 0.33) & (df['time_normalized'] < 0.67)).astype(int)
    df['is_late_play'] = (df['time_normalized'] >= 0.67).astype(int)
    
    if 'pressure_index' in df.columns:
        df['pressure_x_time'] = df['pressure_index'] * df['time_seconds']
        df['pressure_x_late_play'] = df['pressure_index'] * df['is_late_play']
    
    if 'nearest_opponent_dist' in df.columns:
        df['opponent_closing_time'] = df['nearest_opponent_dist'] / (df['s'] + 1)
        df['space_urgency'] = df['time_seconds'] / (df['opponent_closing_time'] + 0.1)
    
    # Velocity & Physics
    if 'dir' in df.columns and 's' in df.columns:
        dir_rad = np.deg2rad(df['dir'].fillna(0))
        o_rad = np.deg2rad(df['o'].fillna(0))
        
        df['velocity_x'] = df['s'] * np.sin(dir_rad)
        df['velocity_y'] = df['s'] * np.cos(dir_rad)
        
        df['angle_diff_orientation_movement'] = np.abs(df['o'] - df['dir'])
        df['angle_diff_orientation_movement'] = np.where(
            df['angle_diff_orientation_movement'] > 180,
            360 - df['angle_diff_orientation_movement'],
            df['angle_diff_orientation_movement']
        )
        
        if 'player_weight' in df.columns:
            df['momentum_magnitude'] = df['player_weight'] * df['s']
        
        df['expected_x_constant_v'] = df['x_last'] + df['velocity_x'] * df['time_seconds']
        df['expected_y_constant_v'] = df['y_last'] + df['velocity_y'] * df['time_seconds']
        
        if 'a' in df.columns:
            df['expected_x_with_accel'] = df['x_last'] + df['velocity_x'] * df['time_seconds'] + 0.5 * df['a'] * np.sin(dir_rad) * df['time_squared']
            df['expected_y_with_accel'] = df['y_last'] + df['velocity_y'] * df['time_seconds'] + 0.5 * df['a'] * np.cos(dir_rad) * df['time_squared']
    
    if 's_mean' in df.columns:
        df['speed_consistency'] = df['s'] / (df['s_mean'] + 0.1)
        df['speed_deviation'] = np.abs(df['s'] - df['s_mean'])
        
    if 'a_mean' in df.columns:
        df['acceleration_consistency'] = df['a'] / (df['a_mean'] + 0.1)
        df['acceleration_deviation'] = np.abs(df['a'] - df['a_mean'])
    
    df['time_x_speed'] = df['time_seconds'] * df['s']
    df['time_x_acceleration'] = df['time_seconds'] * df['a']
    df['time_squared_x_speed'] = df['time_squared'] * df['s']
    
    # Ball features
    if all(col in df.columns for col in ['ball_land_x', 'ball_land_y', 'x_last', 'y_last']):
        ball_dx = df['ball_land_x'] - df['x_last']
        ball_dy = df['ball_land_y'] - df['y_last']
        df['distance_to_ball'] = np.sqrt(ball_dx**2 + ball_dy**2)
        df['angle_to_ball'] = np.arctan2(ball_dy, ball_dx)
        df['ball_direction_x'] = ball_dx / (df['distance_to_ball'] + 1e-6)
        df['ball_direction_y'] = ball_dy / (df['distance_to_ball'] + 1e-6)
        
        if 'o' in df.columns:
            ball_angle = np.rad2deg(df['angle_to_ball'])
            angle_to_ball_diff = np.abs(df['o'] - ball_angle)
            angle_to_ball_diff = np.where(angle_to_ball_diff > 180, 360 - angle_to_ball_diff, angle_to_ball_diff)
            df['facing_ball'] = (angle_to_ball_diff < 45).astype(int)
        
        estimated_ball_speed = 20.0
        df['estimated_time_to_ball'] = df['distance_to_ball'] / estimated_ball_speed
        df['time_ratio_to_ball'] = df['time_seconds'] / (df['estimated_time_to_ball'] + 0.1)
        df['ball_hang_time'] = df['distance_to_ball'] / estimated_ball_speed
        df['ball_peak_time'] = df['ball_hang_time'] / 2
        df['time_after_peak'] = np.maximum(0, df['time_seconds'] - df['ball_peak_time'])
        df['ball_descent_phase'] = (df['time_seconds'] > df['ball_peak_time']).astype(int)
        
        progress = np.clip(df['time_seconds'] / df['ball_hang_time'], 0, 1)
        df['ball_progress'] = progress
        df['expected_ball_x'] = df['x_last'] + ball_dx * progress
        df['expected_ball_y'] = df['y_last'] + ball_dy * progress
        df['distance_to_ball_trajectory'] = np.sqrt(
            (df['x_last'] - df['expected_ball_x'])**2 + 
            (df['y_last'] - df['expected_ball_y'])**2
        )
        
        if 'velocity_x' in df.columns:
            ball_unit_x = ball_dx / (df['distance_to_ball'] + 1e-6)
            ball_unit_y = ball_dy / (df['distance_to_ball'] + 1e-6)
            df['closing_speed'] = df['velocity_x'] * ball_unit_x + df['velocity_y'] * ball_unit_y
            df['projected_time_to_ball'] = df['distance_to_ball'] / (np.abs(df['closing_speed']) + 0.1)
            df['time_urgency'] = df['time_seconds'] / (df['projected_time_to_ball'] + 0.1)
        
        df['distance_to_ball_x_time'] = df['distance_to_ball'] * df['time_seconds']
        df['distance_to_ball_x_time_squared'] = df['distance_to_ball'] * df['time_squared']
    
    # Target features
    if 'target_x' in df.columns:
        target_dx = df['target_x'] - df['x_last']
        target_dy = df['target_y'] - df['y_last']
        df['distance_to_target'] = np.sqrt(target_dx**2 + target_dy**2)
        df['is_target'] = (df['player_role'] == 'Targeted Receiver').astype(int)
        df['angle_to_target'] = np.arctan2(target_dy, target_dx)
        df['distance_to_target_x_time'] = df['distance_to_target'] * df['time_seconds']
        df['is_target_x_time_squared'] = df['is_target'] * df['time_squared']
    
    # Field position & zones
    df['x_normalized'] = df['x_last'] / Config.FIELD_X_MAX
    df['y_normalized'] = df['y_last'] / Config.FIELD_Y_MAX
    df['distance_from_sideline'] = np.minimum(df['y_last'], Config.FIELD_Y_MAX - df['y_last'])
    df['distance_from_endzone'] = np.minimum(df['x_last'], Config.FIELD_X_MAX - df['x_last'])
    
    df['in_hash_marks'] = ((df['y_last'] > 18.5) & (df['y_last'] < 34.8)).astype(int)
    df['field_zone_left'] = (df['y_last'] < 17.77).astype(int)
    df['field_zone_middle'] = ((df['y_last'] >= 17.77) & (df['y_last'] <= 35.53)).astype(int)
    df['field_zone_right'] = (df['y_last'] > 35.53).astype(int)
    df['depth_zone_short'] = (df['absolute_yardline_number'] < 10).astype(int)
    df['depth_zone_medium'] = ((df['absolute_yardline_number'] >= 10) & 
                               (df['absolute_yardline_number'] < 20)).astype(int)
    df['depth_zone_deep'] = (df['absolute_yardline_number'] >= 20).astype(int)
    
    # Role features
    df['is_offense'] = (df['player_side'] == 'Offense').astype(int)
    df['is_passer'] = (df['player_role'] == 'Passer').astype(int)
    df['is_coverage'] = (df['player_role'] == 'Defensive Coverage').astype(int)
    df['is_redzone'] = (df['absolute_yardline_number'] <= 20).astype(int)
    
    df['speed_percentile_player'] = df.groupby('nfl_id')['s'].rank(pct=True)
    df['speed_vs_role_avg'] = df['s'] - df.groupby('player_role')['s'].transform('mean')
    df['acceleration_ratio'] = df['a'] / (df['s'] + 1)
    if 'player_weight' in df.columns:
        df['speed_momentum'] = df['s'] * df['player_weight']
    
    # Interaction features
    if 'is_target' in df.columns:
        df['is_target_x_time'] = df['is_target'] * df['time_seconds']
    if 'distance_to_ball' in df.columns:
        df['distance_ball_x_speed'] = df['distance_to_ball'] * df['s']
    
    df['is_offense_x_early_play'] = df['is_offense'] * df['is_early_play']
    df['is_offense_x_late_play'] = df['is_offense'] * df['is_late_play']
    if 'is_target' in df.columns:
        df['is_target_x_late_play'] = df['is_target'] * df['is_late_play']
    
    # Training targets
    if is_training:
        df['displacement_x'] = df['x'] - df['x_last']
        df['displacement_y'] = df['y'] - df['y_last']
        
        valid_mask = (
            df['displacement_x'].notna() & 
            df['displacement_y'].notna() &
            (np.sqrt(df['displacement_x']**2 + df['displacement_y']**2) <= Config.MAX_SPEED * df['time_seconds'] * 1.5)
        )
        df = df[valid_mask].reset_index(drop=True)
    
    # Fill NaN
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    df[numeric_cols] = df[numeric_cols].fillna(0)
    
    return df

# ================================================================================
# NEURAL NETWORK
# ================================================================================

class EnhancedNN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.25),
            
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(128, 64),
            nn.ReLU(),
            
            nn.Linear(64, 1)
        )
        
    def forward(self, x):
        return self.layers(x)

def train_neural_network(X_train, y_train, X_val, y_val, seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train.reshape(-1, 1)))
    val_dataset = TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val.reshape(-1, 1)))
    
    train_loader = DataLoader(train_dataset, batch_size=Config.NN_BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=Config.NN_BATCH_SIZE)
    
    model = EnhancedNN(X_train.shape[1]).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=Config.NN_LEARNING_RATE)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
    
    best_val_loss = float('inf')
    best_model_state = model.state_dict()
    patience_counter = 0
    
    for epoch in range(Config.NN_EPOCHS):
        model.train()
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
        
        model.eval()
        val_losses = []
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                val_losses.append(loss.item())
        
        avg_val_loss = np.mean(val_losses)
        scheduler.step(avg_val_loss)
        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= 10:
                break
    
    model.load_state_dict(best_model_state)
    return model

# ================================================================================
# ENSEMBLE TRAINING
# ================================================================================

def train_ensemble(train_data, features, seed=42):
    print(f"\nTraining ensemble with seed {seed}...")
    
    X = train_data[features].values
    y_dx = train_data['displacement_x'].values
    y_dy = train_data['displacement_y'].values
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    groups = train_data['game_id'].values
    gkf = GroupKFold(n_splits=Config.N_FOLDS)
    
    models_dx = {'xgb': [], 'lgb': [], 'cat': [], 'nn': []}
    models_dy = {'xgb': [], 'lgb': [], 'cat': [], 'nn': []}
    
    for fold, (train_idx, val_idx) in enumerate(gkf.split(X, groups=groups)):
        print(f"  Fold {fold + 1}/{Config.N_FOLDS}")
        
        X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
        y_train_dx, y_val_dx = y_dx[train_idx], y_dx[val_idx]
        y_train_dy, y_val_dy = y_dy[train_idx], y_dy[val_idx]
        
        # XGBoost
        xgb_dx = XGBRegressor(n_estimators=1200, learning_rate=0.05, max_depth=9, 
                             subsample=0.8, colsample_bytree=0.8, random_state=seed + fold, 
                             tree_method='hist', verbosity=0)
        xgb_dx.fit(X_train, y_train_dx)
        models_dx['xgb'].append(xgb_dx)
        
        xgb_dy = XGBRegressor(n_estimators=1200, learning_rate=0.05, max_depth=9, 
                             subsample=0.8, colsample_bytree=0.8, random_state=seed + fold + 100, 
                             tree_method='hist', verbosity=0)
        xgb_dy.fit(X_train, y_train_dy)
        models_dy['xgb'].append(xgb_dy)
        
        # LightGBM
        lgb_dx = LGBMRegressor(n_estimators=1200, learning_rate=0.05, max_depth=9, 
                              num_leaves=120, subsample=0.8, colsample_bytree=0.8,
                              random_state=seed + fold, verbosity=-1)
        lgb_dx.fit(X_train, y_train_dx)
        models_dx['lgb'].append(lgb_dx)
        
        lgb_dy = LGBMRegressor(n_estimators=1200, learning_rate=0.05, max_depth=9, 
                              num_leaves=120, subsample=0.8, colsample_bytree=0.8,
                              random_state=seed + fold + 100, verbosity=-1)
        lgb_dy.fit(X_train, y_train_dy)
        models_dy['lgb'].append(lgb_dy)
        
        # CatBoost
        cat_dx = CatBoostRegressor(iterations=1200, learning_rate=0.05, depth=9, 
                                   l2_leaf_reg=3, random_seed=seed + fold, verbose=False)
        cat_dx.fit(X_train, y_train_dx)
        models_dx['cat'].append(cat_dx)
        
        cat_dy = CatBoostRegressor(iterations=1200, learning_rate=0.05, depth=9, 
                                   l2_leaf_reg=3, random_seed=seed + fold + 100, verbose=False)
        cat_dy.fit(X_train, y_train_dy)
        models_dy['cat'].append(cat_dy)
        
        # Neural Network
        nn_dx = train_neural_network(X_train, y_train_dx, X_val, y_val_dx, seed + fold)
        models_dx['nn'].append(nn_dx)
        
        nn_dy = train_neural_network(X_train, y_train_dy, X_val, y_val_dy, seed + fold + 100)
        models_dy['nn'].append(nn_dy)
    
    return models_dx, models_dy, scaler

def predict_ensemble(models_dx, models_dy, scaler, X_test):
    X_scaled = scaler.transform(X_test)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    all_preds_dx = []
    for model_type in ['xgb', 'lgb', 'cat']:
        preds = [model.predict(X_scaled) for model in models_dx[model_type]]
        all_preds_dx.append(np.mean(preds, axis=0))
    
    X_tensor = torch.FloatTensor(X_scaled).to(device)
    nn_preds = []
    for model in models_dx['nn']:
        model.eval()
        with torch.no_grad():
            pred = model(X_tensor).cpu().numpy().squeeze()
        nn_preds.append(pred)
    all_preds_dx.append(np.mean(nn_preds, axis=0))
    
    pred_dx = np.mean(all_preds_dx, axis=0)
    
    all_preds_dy = []
    for model_type in ['xgb', 'lgb', 'cat']:
        preds = [model.predict(X_scaled) for model in models_dy[model_type]]
        all_preds_dy.append(np.mean(preds, axis=0))
    
    nn_preds = []
    for model in models_dy['nn']:
        model.eval()
        with torch.no_grad():
            pred = model(X_tensor).cpu().numpy().squeeze()
        nn_preds.append(pred)
    all_preds_dy.append(np.mean(nn_preds, axis=0))
    
    pred_dy = np.mean(all_preds_dy, axis=0)
    
    return pred_dx, pred_dy

# ================================================================================
# POST-PROCESSING
# ================================================================================

def apply_constraints(pred_x, pred_y, x_last, y_last, time_seconds):
    dx = pred_x - x_last
    dy = pred_y - y_last
    displacement = np.sqrt(dx**2 + dy**2)
    
    max_displacement = Config.MAX_SPEED * time_seconds
    
    mask = displacement > max_displacement
    if np.any(mask):
        scale = max_displacement[mask] / (displacement[mask] + 1e-6)
        dx[mask] *= scale
        dy[mask] *= scale
        pred_x[mask] = x_last[mask] + dx[mask]
        pred_y[mask] = y_last[mask] + dy[mask]
    
    pred_x = np.clip(pred_x, Config.FIELD_X_MIN, Config.FIELD_X_MAX)
    pred_y = np.clip(pred_y, Config.FIELD_Y_MIN, Config.FIELD_Y_MAX)
    
    return pred_x, pred_y

def smooth_trajectories(test_data, pred_x, pred_y):
    test_data = test_data.copy()
    test_data['pred_x'] = pred_x
    test_data['pred_y'] = pred_y
    
    for (game_id, play_id, nfl_id), group in test_data.groupby(['game_id', 'play_id', 'nfl_id']):
        if len(group) > 3:
            idx = group.index
            test_data.loc[idx, 'pred_x'] = gaussian_filter1d(group['pred_x'].values, sigma=0.5)
            test_data.loc[idx, 'pred_y'] = gaussian_filter1d(group['pred_y'].values, sigma=0.5)
    
    return test_data['pred_x'].values, test_data['pred_y'].values

# ================================================================================
# MAIN PIPELINE
# ================================================================================

def main():
    print("="*80)
    print(" NFL BIG DATA BOWL 2026 - ENHANCED WITH GRAPH NEURAL FEATURES")
    print("="*80)
    
    train_input, train_output, test_input, test_template = load_data()
    
    print("\nPreparing features with graph neural enhancements...")
    train_data = prepare_features(train_input, train_output, is_training=True)
    test_data = prepare_features(test_input, test_template, is_training=False)
    
    print(f"Train shape: {train_data.shape}")
    print(f"Test shape: {test_data.shape}")
    
    # Auto-detect all numeric features
    exclude_cols = ['game_id', 'play_id', 'nfl_id', 'frame_id', 'x', 'y', 
                    'displacement_x', 'displacement_y', 'x_last', 'y_last']
    feature_cols = [col for col in train_data.select_dtypes(include=[np.number]).columns 
                    if col not in exclude_cols]
    
    print(f"\nUsing {len(feature_cols)} total features")
    print("New features: graph neural embeddings, attention mechanisms, graph centrality")
    
    all_predictions_dx = []
    all_predictions_dy = []
    
    for seed in Config.SEEDS:
        models_dx, models_dy, scaler = train_ensemble(train_data, feature_cols, seed)
        
        X_test = test_data[feature_cols].values
        pred_dx, pred_dy = predict_ensemble(models_dx, models_dy, scaler, X_test)
        
        all_predictions_dx.append(pred_dx)
        all_predictions_dy.append(pred_dy)
        
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        gc.collect()
    
    print("\nAveraging predictions...")
    final_pred_dx = np.mean(all_predictions_dx, axis=0)
    final_pred_dy = np.mean(all_predictions_dy, axis=0)
    
    pred_x = test_data['x_last'].values + final_pred_dx
    pred_y = test_data['y_last'].values + final_pred_dy
    
    print("Applying physics constraints...")
    pred_x, pred_y = apply_constraints(
        pred_x, pred_y,
        test_data['x_last'].values,
        test_data['y_last'].values,
        test_data['time_seconds'].values
    )
    
    print("Smoothing trajectories...")
    pred_x, pred_y = smooth_trajectories(test_data, pred_x, pred_y)
    
    pred_x = np.clip(pred_x, Config.FIELD_X_MIN, Config.FIELD_X_MAX)
    pred_y = np.clip(pred_y, Config.FIELD_Y_MIN, Config.FIELD_Y_MAX)
    
    print("\nCreating submission...")
    submission = pd.DataFrame({
        'id': (test_data['game_id'].astype(str) + "_" +
               test_data['play_id'].astype(str) + "_" +
               test_data['nfl_id'].astype(str) + "_" +
               test_data['frame_id'].astype(str)),
        'x': pred_x,
        'y': pred_y
    })
    
    submission.to_csv("submission.csv", index=False)
    
    print(f"\nâœ… Submission saved: {len(submission)} predictions")
    print(f"X: mean={submission['x'].mean():.2f}, std={submission['x'].std():.2f}")
    print(f"Y: mean={submission['y'].mean():.2f}, std={submission['y'].std():.2f}")
    
    print("\nFirst 5 predictions:")
    print(submission.head())
    
    print("\n" + "="*80)
    print(" COMPLETE WITH GRAPH NEURAL ENHANCEMENTS!")
    print("="*80)
    
    return submission

if __name__ == "__main__":
    submission = main()