In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# ================================================================================
# NFL BIG DATA BOWL 2026 - ENHANCED WITH TEMPORAL FEATURES + PLAYER INTERACTIONS
# Complete ensemble with temporal/time series oriented features
# ================================================================================

import numpy as np
import pandas as pd
import warnings
import gc
from pathlib import Path
from tqdm.auto import tqdm
from scipy.ndimage import gaussian_filter1d
from scipy.spatial.distance import cdist

# Machine Learning
from sklearn.preprocessing import StandardScaler, RobustScaler, LabelEncoder
from sklearn.model_selection import GroupKFold
from sklearn.linear_model import RidgeCV
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

# Deep Learning
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

warnings.filterwarnings('ignore')

# ================================================================================
# CONFIGURATION
# ================================================================================

class Config:
    DATA_DIR = Path("/kaggle/input/nfl-big-data-bowl-2026-prediction/")
    SEEDS = [42, 123, 2024, 69]  # Multiple seeds for ensemble
    FIELD_X_MIN, FIELD_X_MAX = 0.0, 120.0
    FIELD_Y_MIN, FIELD_Y_MAX = 0.0, 53.3
    MAX_SPEED = 12.0
    N_FOLDS = 5
    NN_BATCH_SIZE = 2048
    NN_EPOCHS = 30
    NN_LEARNING_RATE = 0.001

# ================================================================================
# DATA LOADING
# ================================================================================

def load_data():
    """Load all training and test data"""
    print("Loading data...")
    
    # Training data
    train_input_files = [Config.DATA_DIR / f"train/input_2023_w{w:02d}.csv" for w in range(1, 19)]
    train_output_files = [Config.DATA_DIR / f"train/output_2023_w{w:02d}.csv" for w in range(1, 19)]
    
    # Filter existing files
    train_input_files = [f for f in train_input_files if f.exists()]
    train_output_files = [f for f in train_output_files if f.exists()]
    
    print(f"Found {len(train_input_files)} weeks of data")
    
    # Load and concatenate
    train_input = pd.concat([pd.read_csv(f) for f in tqdm(train_input_files, desc="Input")], ignore_index=True)
    train_output = pd.concat([pd.read_csv(f) for f in tqdm(train_output_files, desc="Output")], ignore_index=True)
    
    # Test data
    test_input = pd.read_csv(Config.DATA_DIR / "test_input.csv")
    test_template = pd.read_csv(Config.DATA_DIR / "test.csv")
    
    print(f"Loaded {len(train_input):,} input records, {len(train_output):,} output records")
    
    return train_input, train_output, test_input, test_template

# ================================================================================
# NEW: PLAYER-TO-PLAYER INTERACTION FEATURES
# ================================================================================

def compute_player_interactions(input_df):
    """Compute player-to-player interaction features"""
    print("  Computing player interaction features...")
    
    interaction_features = []
    
    # Group by game and play to get last frame before throw
    for (game_id, play_id), play_group in input_df.groupby(['game_id', 'play_id']):
        # Get last frame before throw
        last_frame = play_group.sort_values('frame_id').groupby('nfl_id').last().reset_index()
        
        # Create position matrix for all players
        positions = last_frame[['x', 'y']].values
        player_ids = last_frame['nfl_id'].values
        player_sides = last_frame['player_side'].values
        player_roles = last_frame['player_role'].values
        
        # Calculate pairwise distances
        if len(positions) > 1:
            distances = cdist(positions, positions)
            
            for i, player_id in enumerate(player_ids):
                player_features = {
                    'game_id': game_id,
                    'play_id': play_id,
                    'nfl_id': player_id
                }
                
                # Mask for same side and opposite side
                same_side_mask = (player_sides == player_sides[i]) & (np.arange(len(positions)) != i)
                opp_side_mask = (player_sides != player_sides[i])
                
                # Nearest teammate
                if np.any(same_side_mask):
                    teammate_distances = distances[i][same_side_mask]
                    player_features['nearest_teammate_dist'] = np.min(teammate_distances)
                    player_features['avg_teammate_dist'] = np.mean(teammate_distances)
                    player_features['teammates_within_5'] = np.sum(teammate_distances < 5)
                    player_features['teammates_within_10'] = np.sum(teammate_distances < 10)
                else:
                    player_features['nearest_teammate_dist'] = 0
                    player_features['avg_teammate_dist'] = 0
                    player_features['teammates_within_5'] = 0
                    player_features['teammates_within_10'] = 0
                
                # Nearest opponent
                if np.any(opp_side_mask):
                    opponent_distances = distances[i][opp_side_mask]
                    player_features['nearest_opponent_dist'] = np.min(opponent_distances)
                    player_features['avg_opponent_dist'] = np.mean(opponent_distances)
                    player_features['opponents_within_5'] = np.sum(opponent_distances < 5)
                    player_features['opponents_within_10'] = np.sum(opponent_distances < 10)
                    
                    # Pressure index (inverse of distance to nearest opponent)
                    player_features['pressure_index'] = 1 / (player_features['nearest_opponent_dist'] + 1)
                else:
                    player_features['nearest_opponent_dist'] = 100
                    player_features['avg_opponent_dist'] = 100
                    player_features['opponents_within_5'] = 0
                    player_features['opponents_within_10'] = 0
                    player_features['pressure_index'] = 0
                
                # Local density (players within 10 yards)
                player_features['local_density'] = np.sum(distances[i] < 10) - 1  # Exclude self
                
                # Formation spread from player's perspective
                if np.any(same_side_mask):
                    teammate_positions = positions[same_side_mask]
                    if len(teammate_positions) > 0:
                        player_features['team_spread_x'] = np.std(teammate_positions[:, 0])
                        player_features['team_spread_y'] = np.std(teammate_positions[:, 1])
                    else:
                        player_features['team_spread_x'] = 0
                        player_features['team_spread_y'] = 0
                
                # Special interactions for specific roles
                if player_roles[i] == 'Targeted Receiver':
                    # Find nearest coverage player
                    coverage_mask = (player_roles == 'Defensive Coverage') & opp_side_mask
                    if np.any(coverage_mask):
                        coverage_distances = distances[i][coverage_mask]
                        player_features['nearest_coverage_dist'] = np.min(coverage_distances)
                        player_features['coverage_players_nearby'] = np.sum(coverage_distances < 10)
                
                # Vectorized features for direction to nearest players
                if np.any(opp_side_mask):
                    nearest_opp_idx = np.argmin(distances[i][opp_side_mask])
                    opp_positions = positions[opp_side_mask]
                    dx = opp_positions[nearest_opp_idx, 0] - positions[i, 0]
                    dy = opp_positions[nearest_opp_idx, 1] - positions[i, 1]
                    player_features['nearest_opp_dx'] = dx
                    player_features['nearest_opp_dy'] = dy
                    player_features['nearest_opp_angle'] = np.arctan2(dy, dx)
                
                interaction_features.append(player_features)
    
    return pd.DataFrame(interaction_features)

# ================================================================================
# FEATURE ENGINEERING - ENHANCED VERSION WITH TEMPORAL FEATURES
# ================================================================================

def height_to_inches(height_str):
    """Convert height from 'ft-in' format to inches"""
    if not isinstance(height_str, str) or '-' not in height_str:
        return 70
    try:
        feet, inches = map(int, height_str.split('-'))
        return feet * 12 + inches
    except:
        return 70

def prepare_features(input_df, output_df, is_training=True):
    """Complete feature preparation pipeline with temporal features and player interactions"""
    
    # Compute player interaction features
    interaction_features = compute_player_interactions(input_df)
    
    # Get last frame before throw
    last_frame = input_df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id']) \
                         .groupby(['game_id', 'play_id', 'nfl_id'], as_index=False).last()
    last_frame = last_frame.rename(columns={'x': 'x_last', 'y': 'y_last'})
    
    # Merge interaction features
    if len(interaction_features) > 0:
        last_frame = last_frame.merge(interaction_features, on=['game_id', 'play_id', 'nfl_id'], how='left')
    
    # ============================================================
    # NEW: TEMPORAL FEATURES FROM INPUT SEQUENCE
    # ============================================================
    # Get temporal statistics from the input frames (before throw)
    temporal_stats = input_df.groupby(['game_id', 'play_id', 'nfl_id']).agg({
        'x': ['mean', 'std', 'min', 'max'],
        'y': ['mean', 'std', 'min', 'max'],
        's': ['mean', 'std', 'max', 'min'],
        'a': ['mean', 'std', 'max', 'min'],
        'dir': lambda x: np.std(np.diff(x)) if len(x) > 1 else 0,  # Direction change rate
        'o': lambda x: np.std(np.diff(x)) if len(x) > 1 else 0,    # Orientation change rate
    }).reset_index()
    temporal_stats.columns = ['_'.join(col).strip() if col[1] else col[0] 
                              for col in temporal_stats.columns.values]
    temporal_stats = temporal_stats.rename(columns={
        'dir_<lambda>': 'dir_change_rate',
        'o_<lambda>': 'orientation_change_rate'
    })
    
    # Get movement patterns from last N frames
    last_n_frames = 5
    recent_frames = input_df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id']) \
                            .groupby(['game_id', 'play_id', 'nfl_id']).tail(last_n_frames)
    
    # Calculate trajectory features from recent frames
    trajectory_features = recent_frames.groupby(['game_id', 'play_id', 'nfl_id']).agg({
        'x': lambda x: (x.iloc[-1] - x.iloc[0]) if len(x) > 1 else 0,  # Recent displacement X
        'y': lambda x: (x.iloc[-1] - x.iloc[0]) if len(x) > 1 else 0,  # Recent displacement Y
        's': lambda x: x.diff().mean() if len(x) > 1 else 0,           # Acceleration trend
    }).reset_index()
    trajectory_features.columns = ['game_id', 'play_id', 'nfl_id', 
                                  'recent_displacement_x', 'recent_displacement_y', 'acceleration_trend']
    
    # Merge temporal features with last frame
    last_frame = last_frame.merge(temporal_stats, on=['game_id', 'play_id', 'nfl_id'], how='left')
    last_frame = last_frame.merge(trajectory_features, on=['game_id', 'play_id', 'nfl_id'], how='left')
    
    # Convert height if available
    if 'player_height' in last_frame.columns:
        last_frame['height_inches'] = last_frame['player_height'].apply(height_to_inches)
    
    # Get target receiver position
    targets = last_frame[last_frame['player_role'] == 'Targeted Receiver'][
        ['game_id', 'play_id', 'x_last', 'y_last']
    ].rename(columns={'x_last': 'target_x', 'y_last': 'target_y'})
    targets = targets.drop_duplicates(['game_id', 'play_id'])
    
    last_frame = last_frame.merge(targets, on=['game_id', 'play_id'], how='left')
    
    # Columns to merge - include new temporal columns and interaction features
    merge_cols = ['game_id', 'play_id', 'nfl_id', 'x_last', 'y_last', 
                  's', 'a', 'o', 'dir', 'player_role', 'player_side',
                  'ball_land_x', 'ball_land_y', 'target_x', 'target_y',
                  'play_direction', 'absolute_yardline_number', 'player_weight',
                  # Temporal columns
                  'x_mean', 'x_std', 'x_min', 'x_max',
                  'y_mean', 'y_std', 'y_min', 'y_max',
                  's_mean', 's_std', 's_max', 's_min',
                  'a_mean', 'a_std', 'a_max', 'a_min',
                  'dir_change_rate', 'orientation_change_rate',
                  'recent_displacement_x', 'recent_displacement_y', 'acceleration_trend',
                  # Player interaction columns
                  'nearest_teammate_dist', 'avg_teammate_dist', 'teammates_within_5', 'teammates_within_10',
                  'nearest_opponent_dist', 'avg_opponent_dist', 'opponents_within_5', 'opponents_within_10',
                  'pressure_index', 'local_density', 'team_spread_x', 'team_spread_y',
                  'nearest_coverage_dist', 'coverage_players_nearby',
                  'nearest_opp_dx', 'nearest_opp_dy', 'nearest_opp_angle']
    
    if 'height_inches' in last_frame.columns:
        merge_cols.append('height_inches')
    
    merge_cols = [c for c in merge_cols if c in last_frame.columns]
    
    # Merge with output
    merged = output_df.merge(last_frame[merge_cols], 
                             on=['game_id', 'play_id', 'nfl_id'], 
                             how='left')
    
    # Engineer features
    df = merged.copy()
    
    # ============================================================
    # TEMPORAL FEATURES
    # ============================================================
    # Basic time features
    df['time_seconds'] = df['frame_id'] / 10.0
    df['time_normalized'] = df['frame_id'] / df.groupby(['game_id', 'play_id', 'nfl_id'])['frame_id'].transform('max')
    
    # Polynomial time features
    df['time_squared'] = df['time_seconds'] ** 2
    df['time_cubed'] = df['time_seconds'] ** 3
    df['sqrt_time'] = np.sqrt(df['time_seconds'])
    df['log_time'] = np.log1p(df['time_seconds'])
    
    # Fourier features for cyclical patterns
    df['time_sin'] = np.sin(2 * np.pi * df['time_normalized'])
    df['time_cos'] = np.cos(2 * np.pi * df['time_normalized'])
    df['time_sin_2'] = np.sin(4 * np.pi * df['time_normalized'])
    df['time_cos_2'] = np.cos(4 * np.pi * df['time_normalized'])
    
    # Phase-based features
    df['is_early_play'] = (df['time_normalized'] < 0.33).astype(int)
    df['is_mid_play'] = ((df['time_normalized'] >= 0.33) & (df['time_normalized'] < 0.67)).astype(int)
    df['is_late_play'] = (df['time_normalized'] >= 0.67).astype(int)
    
    # NEW: Interaction features with time
    if 'pressure_index' in df.columns:
        df['pressure_x_time'] = df['pressure_index'] * df['time_seconds']
        df['pressure_x_late_play'] = df['pressure_index'] * df['is_late_play']
    
    if 'nearest_opponent_dist' in df.columns:
        df['opponent_closing_time'] = df['nearest_opponent_dist'] / (df['s'] + 1)
        df['space_urgency'] = df['time_seconds'] / (df['opponent_closing_time'] + 0.1)
    
    # Velocity components
    if 'dir' in df.columns and 's' in df.columns:
        dir_rad = np.deg2rad(df['dir'].fillna(0))
        df['velocity_x'] = df['s'] * np.sin(dir_rad)
        df['velocity_y'] = df['s'] * np.cos(dir_rad)
        
        # Momentum features
        if 'player_weight' in df.columns:
            df['momentum_magnitude'] = df['player_weight'] * df['s']
        
        # Expected positions based on physics
        df['expected_x_constant_v'] = df['x_last'] + df['velocity_x'] * df['time_seconds']
        df['expected_y_constant_v'] = df['y_last'] + df['velocity_y'] * df['time_seconds']
        
        if 'a' in df.columns:
            df['expected_x_with_accel'] = df['x_last'] + df['velocity_x'] * df['time_seconds'] + 0.5 * df['a'] * np.sin(dir_rad) * df['time_squared']
            df['expected_y_with_accel'] = df['y_last'] + df['velocity_y'] * df['time_seconds'] + 0.5 * df['a'] * np.cos(dir_rad) * df['time_squared']
    
    # Movement consistency features
    if 's_mean' in df.columns:
        df['speed_consistency'] = df['s'] / (df['s_mean'] + 0.1)
        df['speed_deviation'] = np.abs(df['s'] - df['s_mean'])
        
    if 'a_mean' in df.columns:
        df['acceleration_consistency'] = df['a'] / (df['a_mean'] + 0.1)
        df['acceleration_deviation'] = np.abs(df['a'] - df['a_mean'])
    
    # Temporal interaction features
    df['time_x_speed'] = df['time_seconds'] * df['s']
    df['time_x_acceleration'] = df['time_seconds'] * df['a']
    df['time_squared_x_speed'] = df['time_squared'] * df['s']
    
    # Ball distance and angle
    if all(col in df.columns for col in ['ball_land_x', 'ball_land_y', 'x_last', 'y_last']):
        ball_dx = df['ball_land_x'] - df['x_last']
        ball_dy = df['ball_land_y'] - df['y_last']
        df['distance_to_ball'] = np.sqrt(ball_dx**2 + ball_dy**2)
        df['angle_to_ball'] = np.arctan2(ball_dy, ball_dx)
        
        # Ball direction unit vectors
        df['ball_direction_x'] = ball_dx / (df['distance_to_ball'] + 1e-6)
        df['ball_direction_y'] = ball_dy / (df['distance_to_ball'] + 1e-6)
        
        # Time until ball arrival
        estimated_ball_speed = 20.0  # yards/second
        df['estimated_time_to_ball'] = df['distance_to_ball'] / estimated_ball_speed
        df['time_ratio_to_ball'] = df['time_seconds'] / (df['estimated_time_to_ball'] + 0.1)
        
        # Closing speed
        if 'velocity_x' in df.columns:
            ball_unit_x = ball_dx / (df['distance_to_ball'] + 1e-6)
            ball_unit_y = ball_dy / (df['distance_to_ball'] + 1e-6)
            df['closing_speed'] = df['velocity_x'] * ball_unit_x + df['velocity_y'] * ball_unit_y
            
            # Projected time to reach ball
            df['projected_time_to_ball'] = df['distance_to_ball'] / (np.abs(df['closing_speed']) + 0.1)
            df['time_urgency'] = df['time_seconds'] / (df['projected_time_to_ball'] + 0.1)
        
        # Temporal ball distance features
        df['distance_to_ball_x_time'] = df['distance_to_ball'] * df['time_seconds']
        df['distance_to_ball_x_time_squared'] = df['distance_to_ball'] * df['time_squared']
    
    # Target distance
    if 'target_x' in df.columns:
        target_dx = df['target_x'] - df['x_last']
        target_dy = df['target_y'] - df['y_last']
        df['distance_to_target'] = np.sqrt(target_dx**2 + target_dy**2)
        df['is_target'] = (df['player_role'] == 'Targeted Receiver').astype(int)
        df['angle_to_target'] = np.arctan2(target_dy, target_dx)
        
        # Temporal target features
        df['distance_to_target_x_time'] = df['distance_to_target'] * df['time_seconds']
        df['is_target_x_time_squared'] = df['is_target'] * df['time_squared']
    
    # Field position
    df['x_normalized'] = df['x_last'] / Config.FIELD_X_MAX
    df['y_normalized'] = df['y_last'] / Config.FIELD_Y_MAX
    
    # Distance from sidelines and endzone
    df['distance_from_sideline'] = np.minimum(df['y_last'], Config.FIELD_Y_MAX - df['y_last'])
    df['distance_from_endzone'] = np.minimum(df['x_last'], Config.FIELD_X_MAX - df['x_last'])
    
    # Role features
    df['is_offense'] = (df['player_side'] == 'Offense').astype(int)
    df['is_passer'] = (df['player_role'] == 'Passer').astype(int)
    df['is_coverage'] = (df['player_role'] == 'Defensive Coverage').astype(int)
    df['is_redzone'] = (df['absolute_yardline_number'] <= 20).astype(int)
    
    # Interaction features
    if 'is_target' in df.columns:
        df['is_target_x_time'] = df['is_target'] * df['time_seconds']
    if 'distance_to_ball' in df.columns:
        df['distance_ball_x_speed'] = df['distance_to_ball'] * df['s']
    
    # Phase-specific role interactions
    df['is_offense_x_early_play'] = df['is_offense'] * df['is_early_play']
    df['is_offense_x_late_play'] = df['is_offense'] * df['is_late_play']
    if 'is_target' in df.columns:
        df['is_target_x_late_play'] = df['is_target'] * df['is_late_play']
    
    # Training targets
    if is_training:
        df['displacement_x'] = df['x'] - df['x_last']
        df['displacement_y'] = df['y'] - df['y_last']
        
        # Remove invalid samples
        valid_mask = (
            df['displacement_x'].notna() & 
            df['displacement_y'].notna() &
            (np.sqrt(df['displacement_x']**2 + df['displacement_y']**2) <= Config.MAX_SPEED * df['time_seconds'] * 1.5)
        )
        df = df[valid_mask].reset_index(drop=True)
    
    # Fill NaN values
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    df[numeric_cols] = df[numeric_cols].fillna(0)
    
    return df

# ================================================================================
# NEURAL NETWORK
# ================================================================================

class SimpleNN(nn.Module):
    """Simple neural network for regression"""
    
    def __init__(self, input_dim):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)  # Single output
        )
        
    def forward(self, x):
        return self.layers(x)

def train_neural_network(X_train, y_train, X_val, y_val, seed=42):
    """Train a neural network model"""
    
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Create datasets
    train_dataset = TensorDataset(
        torch.FloatTensor(X_train), 
        torch.FloatTensor(y_train.reshape(-1, 1))
    )
    val_dataset = TensorDataset(
        torch.FloatTensor(X_val), 
        torch.FloatTensor(y_val.reshape(-1, 1))
    )
    
    train_loader = DataLoader(train_dataset, batch_size=Config.NN_BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=Config.NN_BATCH_SIZE)
    
    # Create model
    model = SimpleNN(X_train.shape[1]).to(device)
    
    # Training setup
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=Config.NN_LEARNING_RATE)
    
    best_val_loss = float('inf')
    best_model_state = model.state_dict()
    patience_counter = 0
    
    for epoch in range(Config.NN_EPOCHS):
        # Training
        model.train()
        train_losses = []
        
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            
            train_losses.append(loss.item())
        
        # Validation
        model.eval()
        val_losses = []
        
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                val_losses.append(loss.item())
        
        avg_val_loss = np.mean(val_losses)
        
        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= 10:
                break
    
    # Load best model
    model.load_state_dict(best_model_state)
    
    return model

# ================================================================================
# RESIDUAL MODELING (COMMENTED OUT)
# ================================================================================

# def train_residual_model(train_data, base_predictions_x, base_predictions_y, features):
#     """Train model to predict residuals from base predictions"""
#     print("Training residual models...")
#     
#     # Calculate residuals
#     residual_x = train_data['displacement_x'].values - base_predictions_x
#     residual_y = train_data['displacement_y'].values - base_predictions_y
#     
#     # Train lightweight model on residuals for X
#     residual_model_x = LGBMRegressor(
#         n_estimators=200,
#         learning_rate=0.01,
#         max_depth=4,
#         num_leaves=31,
#         min_child_samples=100,
#         subsample=0.8,
#         random_state=42,
#         verbosity=-1
#     )
#     residual_model_x.fit(train_data[features].values, residual_x)
#     
#     # Train lightweight model on residuals for Y
#     residual_model_y = LGBMRegressor(
#         n_estimators=200,
#         learning_rate=0.01,
#         max_depth=4,
#         num_leaves=31,
#         min_child_samples=100,
#         subsample=0.8,
#         random_state=42,
#         verbosity=-1
#     )
#     residual_model_y.fit(train_data[features].values, residual_y)
#     
#     return residual_model_x, residual_model_y

# def apply_residual_correction(base_pred_x, base_pred_y, residual_models, X_test, max_correction=2.0):
#     """Apply residual corrections with guardrails"""
#     residual_model_x, residual_model_y = residual_models
#     
#     # Predict residuals
#     residual_x = residual_model_x.predict(X_test)
#     residual_y = residual_model_y.predict(X_test)
#     
#     # Apply guardrails to limit residual magnitude
#     residual_x = np.clip(residual_x, -max_correction, max_correction)
#     residual_y = np.clip(residual_y, -max_correction, max_correction)
#     
#     # Apply residuals
#     corrected_x = base_pred_x + residual_x
#     corrected_y = base_pred_y + residual_y
#     
#     return corrected_x, corrected_y

# ================================================================================
# ENSEMBLE TRAINING
# ================================================================================

def train_ensemble(train_data, features, seed=42):
    """Train complete ensemble"""
    
    print(f"\nTraining ensemble with seed {seed}...")
    
    # Prepare data
    X = train_data[features].values
    y_dx = train_data['displacement_x'].values
    y_dy = train_data['displacement_y'].values
    
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Cross-validation
    groups = train_data['game_id'].values
    gkf = GroupKFold(n_splits=Config.N_FOLDS)
    
    # Store models
    models_dx = {'xgb': [], 'lgb': [], 'cat': [], 'nn': []}
    models_dy = {'xgb': [], 'lgb': [], 'cat': [], 'nn': []}
    
    for fold, (train_idx, val_idx) in enumerate(gkf.split(X, groups=groups)):
        print(f"  Fold {fold + 1}/{Config.N_FOLDS}")
        
        X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
        y_train_dx, y_val_dx = y_dx[train_idx], y_dx[val_idx]
        y_train_dy, y_val_dy = y_dy[train_idx], y_dy[val_idx]
        
        # XGBoost
        xgb_dx = XGBRegressor(
            n_estimators=1000,
            learning_rate=0.05,
            max_depth=8,
            subsample=0.8,
            random_state=seed + fold,
            tree_method='hist',
            verbosity=0
        )
        xgb_dx.fit(X_train, y_train_dx)
        models_dx['xgb'].append(xgb_dx)
        
        xgb_dy = XGBRegressor(
            n_estimators=1000,
            learning_rate=0.05,
            max_depth=8,
            subsample=0.8,
            random_state=seed + fold + 100,
            tree_method='hist',
            verbosity=0
        )
        xgb_dy.fit(X_train, y_train_dy)
        models_dy['xgb'].append(xgb_dy)
        
        # LightGBM
        lgb_dx = LGBMRegressor(
            n_estimators=1000,
            learning_rate=0.05,
            max_depth=8,
            num_leaves=100,
            subsample=0.8,
            random_state=seed + fold,
            verbosity=-1
        )
        lgb_dx.fit(X_train, y_train_dx)
        models_dx['lgb'].append(lgb_dx)
        
        lgb_dy = LGBMRegressor(
            n_estimators=1000,
            learning_rate=0.05,
            max_depth=8,
            num_leaves=100,
            subsample=0.8,
            random_state=seed + fold + 100,
            verbosity=-1
        )
        lgb_dy.fit(X_train, y_train_dy)
        models_dy['lgb'].append(lgb_dy)
        
        # CatBoost
        cat_dx = CatBoostRegressor(
            iterations=1000,
            learning_rate=0.05,
            depth=8,
            random_seed=seed + fold,
            verbose=False
        )
        cat_dx.fit(X_train, y_train_dx)
        models_dx['cat'].append(cat_dx)
        
        cat_dy = CatBoostRegressor(
            iterations=1000,
            learning_rate=0.05,
            depth=8,
            random_seed=seed + fold + 100,
            verbose=False
        )
        cat_dy.fit(X_train, y_train_dy)
        models_dy['cat'].append(cat_dy)
        
        # Neural Network
        nn_dx = train_neural_network(X_train, y_train_dx, X_val, y_val_dx, seed + fold)
        models_dx['nn'].append(nn_dx)
        
        nn_dy = train_neural_network(X_train, y_train_dy, X_val, y_val_dy, seed + fold + 100)
        models_dy['nn'].append(nn_dy)
    
    return models_dx, models_dy, scaler

def predict_ensemble(models_dx, models_dy, scaler, X_test):
    """Generate predictions from ensemble"""
    
    X_scaled = scaler.transform(X_test)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Predictions for dx
    all_preds_dx = []
    
    # Tree models
    for model_type in ['xgb', 'lgb', 'cat']:
        preds = []
        for model in models_dx[model_type]:
            preds.append(model.predict(X_scaled))
        all_preds_dx.append(np.mean(preds, axis=0))
    
    # Neural network
    nn_preds = []
    X_tensor = torch.FloatTensor(X_scaled).to(device)
    for model in models_dx['nn']:
        model.eval()
        with torch.no_grad():
            pred = model(X_tensor).cpu().numpy().squeeze()
        nn_preds.append(pred)
    all_preds_dx.append(np.mean(nn_preds, axis=0))
    
    # Average all models
    pred_dx = np.mean(all_preds_dx, axis=0)
    
    # Predictions for dy
    all_preds_dy = []
    
    # Tree models
    for model_type in ['xgb', 'lgb', 'cat']:
        preds = []
        for model in models_dy[model_type]:
            preds.append(model.predict(X_scaled))
        all_preds_dy.append(np.mean(preds, axis=0))
    
    # Neural network
    nn_preds = []
    for model in models_dy['nn']:
        model.eval()
        with torch.no_grad():
            pred = model(X_tensor).cpu().numpy().squeeze()
        nn_preds.append(pred)
    all_preds_dy.append(np.mean(nn_preds, axis=0))
    
    # Average all models
    pred_dy = np.mean(all_preds_dy, axis=0)
    
    return pred_dx, pred_dy

# ================================================================================
# POST-PROCESSING
# ================================================================================

def apply_constraints(pred_x, pred_y, x_last, y_last, time_seconds):
    """Apply physics constraints"""
    
    dx = pred_x - x_last
    dy = pred_y - y_last
    displacement = np.sqrt(dx**2 + dy**2)
    
    max_displacement = Config.MAX_SPEED * time_seconds
    
    # Scale down impossible movements
    mask = displacement > max_displacement
    if np.any(mask):
        scale = max_displacement[mask] / (displacement[mask] + 1e-6)
        dx[mask] *= scale
        dy[mask] *= scale
        pred_x[mask] = x_last[mask] + dx[mask]
        pred_y[mask] = y_last[mask] + dy[mask]
    
    # Clip to field boundaries
    pred_x = np.clip(pred_x, Config.FIELD_X_MIN, Config.FIELD_X_MAX)
    pred_y = np.clip(pred_y, Config.FIELD_Y_MIN, Config.FIELD_Y_MAX)
    
    return pred_x, pred_y

def smooth_trajectories(test_data, pred_x, pred_y):
    """Smooth trajectories"""
    
    test_data = test_data.copy()
    test_data['pred_x'] = pred_x
    test_data['pred_y'] = pred_y
    
    for (game_id, play_id, nfl_id), group in test_data.groupby(['game_id', 'play_id', 'nfl_id']):
        if len(group) > 3:
            idx = group.index
            test_data.loc[idx, 'pred_x'] = gaussian_filter1d(group['pred_x'].values, sigma=0.5)
            test_data.loc[idx, 'pred_y'] = gaussian_filter1d(group['pred_y'].values, sigma=0.5)
    
    return test_data['pred_x'].values, test_data['pred_y'].values

# ================================================================================
# MAIN PIPELINE
# ================================================================================

def main():
    """Main execution pipeline"""
    
    print("="*80)
    print(" NFL BIG DATA BOWL 2026 - ENHANCED WITH TEMPORAL FEATURES + PLAYER INTERACTIONS")
    print("="*80)
    
    # Load data
    train_input, train_output, test_input, test_template = load_data()
    
    # Prepare features
    print("\nPreparing enhanced temporal features with player interactions...")
    train_data = prepare_features(train_input, train_output, is_training=True)
    test_data = prepare_features(test_input, test_template, is_training=False)
    
    print(f"Train shape: {train_data.shape}")
    print(f"Test shape: {test_data.shape}")
    
    # Define features to use - including new temporal features and player interactions
    feature_cols = [
        # Original position and movement features
        'x_last', 'y_last', 's', 'a', 'o', 'dir',
        
        # Basic temporal features
        'time_seconds', 'time_normalized', 'time_squared',
        
        # Advanced temporal features
        'time_cubed', 'sqrt_time', 'log_time',
        'time_sin', 'time_cos', 'time_sin_2', 'time_cos_2',
        'is_early_play', 'is_mid_play', 'is_late_play',
        
        # Historical statistics from input frames
        'x_mean', 'x_std', 'x_min', 'x_max',
        'y_mean', 'y_std', 'y_min', 'y_max',
        's_mean', 's_std', 's_max', 's_min',
        'a_mean', 'a_std', 'a_max', 'a_min',
        'dir_change_rate', 'orientation_change_rate',
        'recent_displacement_x', 'recent_displacement_y', 'acceleration_trend',
        
        # Movement consistency
        'speed_consistency', 'speed_deviation',
        'acceleration_consistency', 'acceleration_deviation',
        
        # Velocity and expected positions
        'velocity_x', 'velocity_y',
        'expected_x_constant_v', 'expected_y_constant_v',
        'expected_x_with_accel', 'expected_y_with_accel',
        
        # Ball-related features
        'distance_to_ball', 'angle_to_ball', 'closing_speed',
        'ball_direction_x', 'ball_direction_y',
        'estimated_time_to_ball', 'time_ratio_to_ball',
        'projected_time_to_ball', 'time_urgency',
        'distance_to_ball_x_time', 'distance_to_ball_x_time_squared',
        
        # Target features
        'distance_to_target', 'is_target', 'angle_to_target',
        'distance_to_target_x_time', 'is_target_x_time_squared',
        
        # Field position
        'x_normalized', 'y_normalized',
        'distance_from_sideline', 'distance_from_endzone',
        
        # Role features
        'is_offense', 'is_passer', 'is_coverage',
        'is_redzone',
        
        # Interaction features
        'is_target_x_time', 'distance_ball_x_speed',
        'time_x_speed', 'time_x_acceleration', 'time_squared_x_speed',
        'is_offense_x_early_play', 'is_offense_x_late_play', 'is_target_x_late_play',
        
        # Other features
        'absolute_yardline_number', 'player_weight',
        
        # NEW: Player interaction features
        'nearest_teammate_dist', 'avg_teammate_dist', 'teammates_within_5', 'teammates_within_10',
        'nearest_opponent_dist', 'avg_opponent_dist', 'opponents_within_5', 'opponents_within_10',
        'pressure_index', 'local_density', 'team_spread_x', 'team_spread_y',
        'nearest_coverage_dist', 'coverage_players_nearby',
        'nearest_opp_dx', 'nearest_opp_dy', 'nearest_opp_angle',
        'pressure_x_time', 'pressure_x_late_play',
        'opponent_closing_time', 'space_urgency'
    ]
    
    # Add momentum if available
    if 'momentum_magnitude' in train_data.columns:
        feature_cols.append('momentum_magnitude')
    
    # Filter available features
    feature_cols = [f for f in feature_cols if f in train_data.columns]
    print(f"\nUsing {len(feature_cols)} features including temporal enhancements and player interactions")
    print(f"New features added:")
    print("  - Player-to-player distances and counts")
    print("  - Pressure indices and space urgency")
    print("  - Team formation spread and density")
    print("  - Temporal interaction combinations")
    
    # Train ensembles with different seeds
    all_predictions_dx = []
    all_predictions_dy = []
    
    # For residual modeling (commented out)
    # all_train_predictions_dx = []
    # all_train_predictions_dy = []
    
    for seed in Config.SEEDS:
        # Train
        models_dx, models_dy, scaler = train_ensemble(train_data, feature_cols, seed)
        
        # Predict for test
        X_test = test_data[feature_cols].values
        pred_dx, pred_dy = predict_ensemble(models_dx, models_dy, scaler, X_test)
        
        all_predictions_dx.append(pred_dx)
        all_predictions_dy.append(pred_dy)
        
        # For residual modeling (commented out)
        # X_train = train_data[feature_cols].values
        # train_pred_dx, train_pred_dy = predict_ensemble(models_dx, models_dy, scaler, X_train)
        # all_train_predictions_dx.append(train_pred_dx)
        # all_train_predictions_dy.append(train_pred_dy)
        
        # Clean up memory
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
        gc.collect()
    
    # Average predictions
    print("\nAveraging predictions...")
    final_pred_dx = np.mean(all_predictions_dx, axis=0)
    final_pred_dy = np.mean(all_predictions_dy, axis=0)
    
    # Residual modeling (commented out)
    # print("\nTraining residual models...")
    # train_pred_dx_mean = np.mean(all_train_predictions_dx, axis=0)
    # train_pred_dy_mean = np.mean(all_train_predictions_dy, axis=0)
    # 
    # residual_model_x, residual_model_y = train_residual_model(
    #     train_data, train_pred_dx_mean, train_pred_dy_mean, feature_cols
    # )
    # 
    # print("Applying residual corrections...")
    # final_pred_dx, final_pred_dy = apply_residual_correction(
    #     final_pred_dx, final_pred_dy, 
    #     (residual_model_x, residual_model_y),
    #     test_data[feature_cols].values,
    #     max_correction=1.0  # Limit residual correction magnitude
    # )
    
    # Calculate absolute positions
    pred_x = test_data['x_last'].values + final_pred_dx
    pred_y = test_data['y_last'].values + final_pred_dy
    
    # Apply constraints
    print("Applying physics constraints...")
    pred_x, pred_y = apply_constraints(
        pred_x, pred_y,
        test_data['x_last'].values,
        test_data['y_last'].values,
        test_data['time_seconds'].values
    )
    
    # Smooth trajectories
    print("Smoothing trajectories...")
    pred_x, pred_y = smooth_trajectories(test_data, pred_x, pred_y)
    
    # Final clipping
    pred_x = np.clip(pred_x, Config.FIELD_X_MIN, Config.FIELD_X_MAX)
    pred_y = np.clip(pred_y, Config.FIELD_Y_MIN, Config.FIELD_Y_MAX)
    
    # Create submission
    print("\nCreating submission...")
    submission = pd.DataFrame({
        'id': (test_data['game_id'].astype(str) + "_" +
               test_data['play_id'].astype(str) + "_" +
               test_data['nfl_id'].astype(str) + "_" +
               test_data['frame_id'].astype(str)),
        'x': pred_x,
        'y': pred_y
    })
    
    submission.to_csv("submission.csv", index=False)
    
    print(f"\nâœ… Submission saved: {len(submission)} predictions")
    print(f"X: mean={submission['x'].mean():.2f}, std={submission['x'].std():.2f}")
    print(f"Y: mean={submission['y'].mean():.2f}, std={submission['y'].std():.2f}")
    
    print("\nFirst 5 predictions:")
    print(submission.head())
    
    print("\n" + "="*80)
    print(" COMPLETE!")
    print("="*80)
    
    return submission

if __name__ == "__main__":
    submission = main()