In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load processed PTMIS data
df = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2026-analytics-1/merged_cleaned_features.csv", low_memory=False)

# Load output from previous PTMIS stage
ptmis = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2026-analytics-3/ptmis_basic_output.csv")

# Merge back to original dataframe for visualization
df_vis = df.merge(ptmis, on=['game_id','play_id','nfl_id'], how='left')

# Clean numeric fields
df_vis = df_vis.fillna(0)

# ------------------------------------------
# 1. Histogram Distribusi PTMIS
# ------------------------------------------
plt.figure(figsize=(10,5))
sns.histplot(df_vis['PTMIS_score'], bins=50)
plt.title("Distribution of PTMIS Scores")
plt.xlabel("PTMIS Score")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig("ptmis_distribution.png")
plt.close()


# ------------------------------------------
# 2. Heatmap PTMIS per Play
# ------------------------------------------
ptmis_pivot = df_vis.pivot_table(
    index='play_id',
    columns='nfl_id',
    values='PTMIS_score',
    aggfunc='mean'
)

plt.figure(figsize=(14,8))
sns.heatmap(ptmis_pivot, cmap='viridis')
plt.title("PTMIS Heatmap per Play vs Player")
plt.tight_layout()
plt.savefig("ptmis_heatmap.png")
plt.close()


# ------------------------------------------
# 3. Trajectory Plot of One Sample Play
# ------------------------------------------

# Pick one example play for visualization
sample_play = df_vis['play_id'].iloc[0]

traj = df_vis[df_vis['play_id'] == sample_play]

# Plot movement of players
plt.figure(figsize=(10,6))
for pid, group in traj.groupby('nfl_id'):
    plt.plot(group['x_input'], group['y_input'], alpha=0.7)

plt.title(f"Player Movement Trajectories (Play ID = {sample_play})")
plt.xlabel("X Position")
plt.ylabel("Y Position")
plt.tight_layout()
plt.savefig("trajectory_sample_play.png")
plt.close()


# ------------------------------------------
# 4. Scatter Plot: Movement Speed vs PTMIS
# ------------------------------------------
df_vis = df_vis.sort_values(['game_id','play_id','nfl_id','frame_id']).copy()

df_vis['dx_actual'] = df_vis.groupby(['game_id','play_id','nfl_id'])['x_input'].diff().fillna(0)
df_vis['dy_actual'] = df_vis.groupby(['game_id','play_id','nfl_id'])['y_input'].diff().fillna(0)

df_vis['speed'] = np.sqrt(df_vis['dx_actual']**2 + df_vis['dy_actual']**2)

plt.figure(figsize=(10,6))
sns.scatterplot(x='speed', y='PTMIS_score', alpha=0.5, data=df_vis)
plt.title("Speed vs PTMIS Score")
plt.xlabel("Speed (per frame movement)")
plt.ylabel("PTMIS Score")
plt.tight_layout()
plt.savefig("speed_vs_ptmis.png")
plt.close()


# ------------------------------------------
# 5. Boxplot PTMIS by Position
# ------------------------------------------
plt.figure(figsize=(12,6))
sns.boxplot(x='player_position', y='PTMIS_score', data=df_vis)
plt.title("PTMIS Score by Position")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("ptmis_by_position.png")
plt.close()

print("All visualization files saved:")
print([
    "ptmis_distribution.png",
    "ptmis_heatmap.png",
    "trajectory_sample_play.png",
    "speed_vs_ptmis.png",
    "ptmis_by_position.png"
])
