In [None]:
import os
import sys
# Import TensorFlow first to avoid Protobuf version conflicts with other libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

import pandas as pd
import polars as pl
import numpy as np
import kaggle_evaluation.nfl_inference_server

# =================================================================================================
# PART 1: LOCAL DATA LOADING (FOR DEBUGGING & DEVELOPMENT ONLY)
# =================================================================================================
# The code in this section is NOT part of the standard submission template.
# It is used to load data locally using the 'Load_dataframe_manual' module for testing purposes.
# This allows you to inspect the data and verify the preprocessing logic before submitting.
# =================================================================================================

# # Add current directory to path to find the custom module
# sys.path.append(os.getcwd())

# # Define paths for local testing files
# TEST_INPUT_PATH = '/home/samer/Desktop/competitions/NFL_Big_Data_Bowl_2026_dev/nfl-big-data-bowl-2026-prediction/test_input.csv'
# TEST_PATH = '/home/samer/Desktop/competitions/NFL_Big_Data_Bowl_2026_dev/nfl-big-data-bowl-2026-prediction/test.csv'

# test_input_df_manual = None
# test_df_manual = None

# try:
#     from Load_dataframe_manual import load_dataframe
#     print("--- Local Data Loading ---")
#     print(f"Attempting to load test data from:\n {TEST_INPUT_PATH}\n {TEST_PATH}")
    
#     # Load DataFrames manually
#     test_input_df_manual = load_dataframe(TEST_INPUT_PATH)
#     test_df_manual = load_dataframe(TEST_PATH)
#     print("Local data loaded successfully.")
    
# except ImportError:
#     print("Module 'Load_dataframe_manual' not found. Skipping local data loading.")
# except Exception as e:
#     print(f"Error during local data loading: {e}")

# =================================================================================================
# PART 2: SUBMISSION CODE (REQUIRED)
# =================================================================================================
# The following code implements the logic required for the Kaggle submission.
# It includes model loading, data preprocessing, and the prediction function.
# =================================================================================================

# Constants
MODEL_PATH = '/home/samer/Desktop/competitions/NFL_Big_Data_Bowl_2026_dev/trained_models/nfl_23_input_validation loss_ 118.4529.keras'

# Columns expected by the model (matching training data from csv_to_numpy.py)
COLUMN_ORDER = [
    'game_id', 'play_id', 'player_to_predict', 'nfl_id', 'frame_id', 
    'play_direction', 'absolute_yardline_number', 'player_name', 
    'player_height', 'player_weight', 'player_birth_date', 'player_position', 
    'player_side', 'player_role', 'x', 'y', 's', 'a', 'dir', 'o', 
    'num_frames_output', 'ball_land_x', 'ball_land_y'
]

# Global Model Variable
model = None

def load_model_if_needed():
    """
    Loads the Keras model if it hasn't been loaded yet.
    """
    global model
    if model is None:
        print(f"Loading model from {MODEL_PATH}...")
        try:
            model = tf.keras.models.load_model(MODEL_PATH)
            print("Model loaded successfully.")
        except Exception as e:
            print(f"Error loading model: {e}")
            raise e
    return model

def process_value(val):
    """
    Converts a single value into the appropriate type for the model.
    Matches the logic used in 'csv_to_numpy.py' during training.
    """
    # Handle Booleans (including numpy/pandas bool types)
    if isinstance(val, (bool, np.bool_)):
        return 1.0 if val else 0.0
        
    if isinstance(val, (int, float, np.number)):
        return float(val)
        
    val_str = str(val).lower()

    # Handle Booleans (string representation)
    if val_str == 'true':
        return 1.0
    if val_str == 'false':
        return 0.0
    
    # Handle Direction (left/right)
    if val_str == 'left':
        return 0.0
    if val_str == 'right':
        return 1.0

    # Handle Player Side (defense/offense)
    if val_str == 'defense':
        return 0.0
    if val_str == 'offense':
        return 1.0
    
    # Handle Numbers (Integers and Floats in string format)
    try:
        return float(val)
    except ValueError:
        pass
        
    # Handle Strings (Object type) - hashing
    # This matches the fallback in NFLDataSequence
    return float(hash(str(val)) % 10000)

def preprocess(test_df, test_input_df):
    """
    Preprocesses the input dataframes into the format expected by the model.
    1. Converts Polars to Pandas (if needed).
    2. Filters for the specific player and play.
    3. Extracts features in the correct order.
    4. Pads sequences to create a batch.
    """
    # Convert Polars to Pandas
    if isinstance(test_df, pl.DataFrame):
        test_df = test_df.to_pandas()
    if isinstance(test_input_df, pl.DataFrame):
        test_input_df = test_input_df.to_pandas()

    # Normalize player_to_predict to boolean for filtering
    if 'player_to_predict' in test_input_df.columns:
        if test_input_df['player_to_predict'].dtype == 'object':
             # Handle string 'True'/'False'
             test_input_df['player_to_predict'] = test_input_df['player_to_predict'].astype(str).str.lower() == 'true'
        else:
             # Handle boolean or numeric (1/0)
             test_input_df['player_to_predict'] = test_input_df['player_to_predict'].astype(bool)

    features_batch = []
    
    for _, row in test_df.iterrows():
        # Filter for the specific player and play
        # The test_df specifies which player (nfl_id) in which play (game_id, play_id) to predict
        mask = (
            (test_input_df['game_id'] == row['game_id']) & 
            (test_input_df['play_id'] == row['play_id']) & 
            (test_input_df['nfl_id'] == row['nfl_id'])
        )
        
        # Strictly enforce player_to_predict == True
        if 'player_to_predict' in test_input_df.columns:
            mask = mask & (test_input_df['player_to_predict'] == True)
            
        player_data = test_input_df[mask]
        
        if player_data.empty:
            # Fallback if no matching data found - return zeros of correct shape
            seq = np.zeros((1, len(COLUMN_ORDER)))
        else:
            # Sort by frame_id to ensure temporal order
            if 'frame_id' in player_data.columns:
                player_data = player_data.sort_values('frame_id')
            
            seq = []
            for _, r in player_data.iterrows():
                frame_feats = []
                # Extract all columns in the specific order used during training
                for col in COLUMN_ORDER:
                    if col in r:
                        frame_feats.append(process_value(r[col]))
                    else:
                        # Missing column fallback
                        frame_feats.append(0.0)
                
                seq.append(frame_feats)
            
            seq = np.array(seq)
            
        features_batch.append(seq)
    
    # Pad sequences to the max length in this batch
    X_padded = pad_sequences(
        features_batch, 
        dtype='float32',
        padding='post',
        truncating='post',
        value=0.0
    )
    
    return X_padded

def predict(test_df, test_input_df):
    """
    Generates predictions for a single batch (play).
    Args:
        test_df (pl.DataFrame or pd.DataFrame): Metadata for the prediction request.
        test_input_df (pl.DataFrame or pd.DataFrame): Context data for the play.
    Returns:
        pd.DataFrame: DataFrame with 'x' and 'y' predictions.
    """
    load_model_if_needed()
    
    # Preprocess
    features = preprocess(test_df, test_input_df)
    
    # Run inference
    # model(features) is faster than model.predict(features) for small batches
    # However, for large batches (like local testing), we use model.predict to handle batching and avoid OOM
    if len(features) > 32:
        predictions_xy = model.predict(features, batch_size=32, verbose=0)
    else:
        predictions_xy = model(features, training=False).numpy()
    
    # Handle 3D output (batch_size, time_steps, features)
    # The model returns a sequence. We take the last timestep to get (batch_size, 2)
    if len(predictions_xy.shape) == 3:
        predictions_xy = predictions_xy[:, -1, :]
        
    # Format the predictions into the required DataFrame
    return pd.DataFrame(predictions_xy, columns=['x', 'y'])

# =================================================================================================
# PART 3: INFERENCE SERVER (ENTRY POINT)
# =================================================================================================
# This block initializes the inference server provided by Kaggle.
# =================================================================================================

if __name__=="__main__":
    # if test_df_manual is not None and test_input_df_manual is not None:
    #     print("Running local prediction test...")
    #     predictions = predict(test_df_manual, test_input_df_manual)
    #     print("Predictions generated successfully:")
    #     print(predictions.head())
    # else:
    #     print("Skipping local test: Data not loaded.")
    pass

inference_server = kaggle_evaluation.nfl_inference_server.NFLInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('/home/samer/Desktop/competitions/NFL_Big_Data_Bowl_2026_dev/nfl-big-data-bowl-2026-prediction/',))
