# Trajectory Analysis

This notebook provides tools for analyzing collected trajectories from UI VLM training.

In [None]:
import sys
import json
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Add src to path
sys.path.insert(0, str(Path.cwd().parent))

from src.data_utils.trajectory import Trajectory

print("Imports successful!")

## Load Trajectories

In [None]:
# Load trajectory files
trajectory_dir = Path("../experiments/exp_001_rejection_sampling/trajectories")
trajectory_files = list(trajectory_dir.glob("*.json"))

print(f"Found {len(trajectory_files)} trajectory files")

# Load trajectories
trajectories = []
for traj_file in trajectory_files[:100]:  # Load first 100
    with open(traj_file, 'r') as f:
        data = json.load(f)
        trajectories.append(Trajectory.from_dict(data))

print(f"Loaded {len(trajectories)} trajectories")

## Basic Statistics

In [None]:
# Calculate basic statistics
lengths = [len(t) for t in trajectories]
total_rewards = [sum(t.rewards) for t in trajectories]
avg_rewards = [sum(t.rewards) / len(t.rewards) if t.rewards else 0 for t in trajectories]

stats = {
    'num_trajectories': len(trajectories),
    'avg_length': np.mean(lengths),
    'std_length': np.std(lengths),
    'avg_total_reward': np.mean(total_rewards),
    'std_total_reward': np.std(total_rewards),
    'avg_step_reward': np.mean(avg_rewards),
    'success_rate': np.mean([r > 0 for r in total_rewards])
}

pd.DataFrame([stats]).T

## Visualizations

In [None]:
# Plot reward distribution
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Trajectory lengths
axes[0, 0].hist(lengths, bins=30, edgecolor='black')
axes[0, 0].set_xlabel('Trajectory Length')
axes[0, 0].set_ylabel('Count')
axes[0, 0].set_title('Distribution of Trajectory Lengths')
axes[0, 0].axvline(np.mean(lengths), color='red', linestyle='--', label=f'Mean: {np.mean(lengths):.1f}')
axes[0, 0].legend()

# Total rewards
axes[0, 1].hist(total_rewards, bins=30, edgecolor='black')
axes[0, 1].set_xlabel('Total Reward')
axes[0, 1].set_ylabel('Count')
axes[0, 1].set_title('Distribution of Total Rewards')
axes[0, 1].axvline(np.mean(total_rewards), color='red', linestyle='--', label=f'Mean: {np.mean(total_rewards):.2f}')
axes[0, 1].legend()

# Average rewards per step
axes[1, 0].hist(avg_rewards, bins=30, edgecolor='black')
axes[1, 0].set_xlabel('Average Reward per Step')
axes[1, 0].set_ylabel('Count')
axes[1, 0].set_title('Distribution of Average Step Rewards')
axes[1, 0].axvline(np.mean(avg_rewards), color='red', linestyle='--', label=f'Mean: {np.mean(avg_rewards):.2f}')
axes[1, 0].legend()

# Reward vs length scatter
axes[1, 1].scatter(lengths, total_rewards, alpha=0.5)
axes[1, 1].set_xlabel('Trajectory Length')
axes[1, 1].set_ylabel('Total Reward')
axes[1, 1].set_title('Reward vs Length')

plt.tight_layout()
plt.show()

## Action Analysis

In [None]:
# Extract all actions
all_actions = [action for traj in trajectories for action in traj.actions]

print(f"Total actions: {len(all_actions)}")
print(f"\nSample actions:")
for action in all_actions[:10]:
    print(f"  {action}")

## Success Analysis

In [None]:
# Analyze successful vs failed trajectories
success_threshold = 0.5
successful = [t for t in trajectories if sum(t.rewards) > success_threshold]
failed = [t for t in trajectories if sum(t.rewards) <= success_threshold]

print(f"Successful trajectories: {len(successful)} ({len(successful)/len(trajectories)*100:.1f}%)")
print(f"Failed trajectories: {len(failed)} ({len(failed)/len(trajectories)*100:.1f}%)")

if successful and failed:
    print(f"\nAverage length - Successful: {np.mean([len(t) for t in successful]):.1f}")
    print(f"Average length - Failed: {np.mean([len(t) for t in failed]):.1f}")