In [None]:
import pandas as pd
import numpy as np

# ============================================
# 1. Load Dataset
# ============================================
file_path = "/kaggle/input/nfl-big-data-bowl-2026-analytics-1/merged_cleaned_features.csv"
df = pd.read_csv(file_path, low_memory=False)

# Keep only necessary numeric features
df = df[['game_id','play_id','nfl_id','frame_id',
         'x_input','y_input', 
         'x_output','y_output',
         'ball_land_x','ball_land_y',
         'player_position',
         'play_direction']]

# Fill missing values
df = df.fillna(-1)

# Sort for frame-based operations
df = df.sort_values(by=['game_id','play_id','nfl_id','frame_id'])


# ============================================
# 2. Compute Actual Movement Vectors
# ============================================
def compute_actual_vectors(group):
    group['dx_actual'] = group['x_input'].diff()
    group['dy_actual'] = group['y_input'].diff()
    return group

df = df.groupby(['game_id','play_id','nfl_id'], group_keys=False).apply(compute_actual_vectors)

# Replace NaN from diff (first frame) with 0
df['dx_actual'] = df['dx_actual'].fillna(0)
df['dy_actual'] = df['dy_actual'].fillna(0)


# ============================================
# 3. Compute Optimal Movement Vectors (Simple Rule-Based)
# ============================================

def compute_optimal_direction(row):
    # Receiver: move toward the official target output point
    if row['player_position'] in ['WR','TE','RB']:
        dx = row['x_output'] - row['x_input']
        dy = row['y_output'] - row['y_input']

    # Defender: chase ball landing point
    elif row['player_position'] in ['CB','S','LB','DB']:
        dx = row['ball_land_x'] - row['x_input']
        dy = row['ball_land_y'] - row['y_input']

    # Others: focus on maintaining direction of play
    else:
        if row['play_direction'] == 'right':
            dx, dy = 1, 0
        else:
            dx, dy = -1, 0

    return pd.Series([dx, dy])

df[['dx_opt', 'dy_opt']] = df.apply(compute_optimal_direction, axis=1)


# ============================================
# 4. Normalize actual & optimal vectors
# ============================================
def normalize_vector(x, y):
    norm = np.sqrt(x*x + y*y)
    norm = np.where(norm == 0, 1, norm)
    return x / norm, y / norm

df['dx_actual_n'], df['dy_actual_n'] = normalize_vector(df['dx_actual'].values, df['dy_actual'].values)
df['dx_opt_n'], df['dy_opt_n'] = normalize_vector(df['dx_opt'].values, df['dy_opt'].values)


# ============================================
# 5. Compute Cosine Similarity
# ============================================
df['cosine_similarity'] = (
    df['dx_actual_n'] * df['dx_opt_n'] +
    df['dy_actual_n'] * df['dy_opt_n']
)


# ============================================
# 6. PTMIS per Frame and Aggregated Score
# ============================================
df['ptmis_frame'] = df['cosine_similarity']

ptmis_summary = df.groupby(['game_id','play_id','nfl_id'])['ptmis_frame'].mean().reset_index()
ptmis_summary = ptmis_summary.rename(columns={'ptmis_frame': 'PTMIS_score'})

# Save PTMIS result
ptmis_summary.to_csv("ptmis_basic_output.csv", index=False)

ptmis_summary.head()
