# Clearance Comparison Analysis

This notebook generates a 2Ã—N grid comparing performance across different clearance values:
- Top row: Success Rate vs Position Noise
- Bottom row: Break Rate vs Position Noise
- Gold highlight box around the reference clearance level

In [None]:
# ============================================================
# BLOCK 1: IMPORTS & CONSTANTS
# ============================================================

import wandb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from collections import defaultdict

# WandB Configuration
ENTITY = "hur"
PROJECT = "SG_Exps"

# Clearance Levels - keys are internal names, each contains method tags
CLEARANCE_LEVELS = {
    "0.5mm": {
        "Pose": "PLACEHOLDER_POSE_0.5MM",
        "Hybrid-Basic": "PLACEHOLDER_HYBRID_0.5MM",
        "LCLoP": "PLACEHOLDER_LCLOP_0.5MM",
    },
    "1mm": {
        "Pose": "PLACEHOLDER_POSE_1MM",
        "Hybrid-Basic": "PLACEHOLDER_HYBRID_1MM",
        "LCLoP": "PLACEHOLDER_LCLOP_1MM",
    },
    "2mm": {
        "Pose": "PLACEHOLDER_POSE_2MM",
        "Hybrid-Basic": "PLACEHOLDER_HYBRID_2MM",
        "LCLoP": "PLACEHOLDER_LCLOP_2MM",
    },
    "4mm": {
        "Pose": "PLACEHOLDER_POSE_4MM",
        "Hybrid-Basic": "PLACEHOLDER_HYBRID_4MM",
        "LCLoP": "PLACEHOLDER_LCLOP_4MM",
    },
}

# Display name mapping (for special cases if needed)
CLEARANCE_DISPLAY_NAMES = {
    "0.5mm": "Clearance = 0.5mm",
    "1mm": "Clearance = 1mm",
    "2mm": "Clearance = 2mm",
    "4mm": "Clearance = 4mm",
}

# Highlight box - which clearance level to highlight with gold box
HIGHLIGHT_CLEARANCE = "1mm"  # Set to None for no highlight

# Evaluation Tags
TAG_EVAL_PERFORMANCE = "eval_performance"
TAG_EVAL_NOISE = "eval_noise"

# Noise Level Mapping: display label -> metric range string
NOISE_LEVELS = {
    "1mm": "0mm-1mm",
    "2.5mm": "1mm-2.5mm",
    "5mm": "2.5mm-5mm",
    "7.5mm": "5mm-7.5mm",
}

# Metrics
METRIC_SUCCESS = "num_successful_completions"
METRIC_BREAKS = "num_breaks"
METRIC_TOTAL = "total_episodes"

In [None]:
# ============================================================
# BLOCK 2: DETERMINE BEST POLICY
# ============================================================

def get_best_checkpoint_per_run(api, method_tag):
    """Find the best checkpoint for each run with the given method tag."""
    runs = api.runs(
        f"{ENTITY}/{PROJECT}",
        filters={"$and": [{"tags": method_tag}, {"tags": TAG_EVAL_PERFORMANCE}]}
    )
    
    best_checkpoints = {}
    for run in runs:
        history = run.history()
        if history.empty:
            print(f"Warning: Run {run.name} has no history data")
            continue
        
        # Calculate score: successes - breaks
        history["score"] = history[f"Eval_Core/{METRIC_SUCCESS}"] - history[f"Eval_Core/{METRIC_BREAKS}"]
        best_idx = history["score"].idxmax()
        best_step = int(history.loc[best_idx, "total_steps"])
        
        best_checkpoints[run.id] = {
            "run_name": run.name,
            "best_step": best_step,
            "score": history.loc[best_idx, "score"],
        }
        print(f"    {run.name}: best checkpoint at step {best_step} (score: {history.loc[best_idx, 'score']:.0f})")
    
    return best_checkpoints

# Get best checkpoints for each clearance level and method
api = wandb.Api()
best_checkpoints = defaultdict(dict)  # best_checkpoints[clearance][method]

for clearance, method_tags in CLEARANCE_LEVELS.items():
    print(f"\n{'='*60}")
    print(f"Clearance: {clearance}")
    print(f"{'='*60}")
    for method_name, method_tag in method_tags.items():
        print(f"\n  {method_name} ({method_tag}):")
        best_checkpoints[clearance][method_name] = get_best_checkpoint_per_run(api, method_tag)

In [None]:
# ============================================================
# BLOCK 3: DOWNLOAD DATA
# ============================================================

def download_eval_noise_data(api, method_tag, best_checkpoints):
    """Download eval_noise data for best checkpoints across all noise levels."""
    runs = api.runs(
        f"{ENTITY}/{PROJECT}",
        filters={"$and": [{"tags": method_tag}, {"tags": TAG_EVAL_NOISE}]}
    )

    # Build lookup by agent number from best_checkpoints
    checkpoint_by_agent = {}
    for run_id, info in best_checkpoints.items():
        agent_num = info["run_name"].rsplit("_", 1)[-1]
        checkpoint_by_agent[agent_num] = info["best_step"]

    data = []
    for run in runs:
        # Extract agent number from run name
        agent_num = run.name.rsplit("_", 1)[-1]

        if agent_num not in checkpoint_by_agent:
            print(f"Warning: No matching performance run for agent {agent_num} ({run.name})")
            continue

        best_step = checkpoint_by_agent[agent_num]
        history = run.history()
        
        if best_step not in history["total_steps"].values:
            print(f"Warning: Checkpoint {best_step} not found in {run.name}")
            continue
        
        row = history[history["total_steps"] == best_step].iloc[0]
        
        for noise_label, noise_range in NOISE_LEVELS.items():
            prefix = f"Noise_Eval({noise_range})_Core"
            data.append({
                "run_id": run.id,
                "run_name": run.name,
                "checkpoint": best_step,
                "noise_level": noise_label,
                "success": row[f"{prefix}/{METRIC_SUCCESS}"],
                "breaks": row[f"{prefix}/{METRIC_BREAKS}"],
                "total": row[f"{prefix}/{METRIC_TOTAL}"],
            })
    
    return pd.DataFrame(data)

# Download all data
noise_data = defaultdict(dict)  # noise_data[clearance][method]

for clearance, method_tags in CLEARANCE_LEVELS.items():
    print(f"\n{'='*60}")
    print(f"Downloading data for Clearance: {clearance}")
    print(f"{'='*60}")
    for method_name, method_tag in method_tags.items():
        print(f"\n  {method_name}...")
        noise_data[clearance][method_name] = download_eval_noise_data(
            api, method_tag, best_checkpoints[clearance][method_name]
        )

# Print summary
print("\n" + "="*60)
print("DATA SUMMARY")
print("="*60)
for clearance in CLEARANCE_LEVELS.keys():
    print(f"\n{clearance}:")
    for method_name in CLEARANCE_LEVELS[clearance].keys():
        df = noise_data[clearance][method_name]
        if not df.empty:
            num_runs = df["run_name"].nunique()
            print(f"  {method_name}: {num_runs} runs")
        else:
            print(f"  {method_name}: No data")

In [None]:
# ============================================================
# BLOCK 4: CLEARANCE COMPARISON FIGURE
# ============================================================

# Policy Selection
TOP_N_POLICIES = None  # Set to integer (e.g., 3) to use top N policies, or None for all

# Highlight Configuration
HIGHLIGHT_CLEARANCE_PLOT = "1mm"  # Which clearance to highlight with gold box, or None
HIGHLIGHT_COLOR = "gold"
HIGHLIGHT_LINEWIDTH = 3

# N/A Configuration - for clearance levels where break rate is not applicable
NA_CLEARANCES = []  # List of clearance keys to show N/A instead of break rate plot
NA_TEXT = "N/A"  # Text to display in the N/A box

# Figure Configuration
FIGSIZE = (14, 6)  # Width x Height
DPI = 150
BAR_WIDTH = 0.25

# Colors
COLORS = {
    "Pose": "#2ca02c",        # Green
    "Hybrid-Basic": "#ff7f0e", # Orange
    "LCLoP": "#1f77b4",       # Blue
}

# Font sizes
FONT_SUPTITLE = 16
FONT_TITLE = 12
FONT_AXIS_LABEL = 10
FONT_TICK = 9
FONT_LEGEND = 9
FONT_NA = 12  # Font size for N/A text

# Axis configuration
SUCCESS_Y_LIM = (0, 100)
SUCCESS_Y_TICKS = [0, 20, 40, 60, 80, 100]
BREAK_Y_LIM = (0, 25)
BREAK_Y_TICKS = [0, 5, 10, 15, 20, 25]

# Labels
SUPTITLE = "Performance vs Position Noise Across Clearance Values"
X_LABEL = "Position Noise"
SUCCESS_Y_LABEL = "Success Rate (%)"
BREAK_Y_LABEL = "Break Rate (%)"

# ============================================================

def filter_top_n_runs(df, method_best_checkpoints, top_n):
    """Filter dataframe to only include top N runs by score."""
    if df.empty or top_n is None or len(method_best_checkpoints) <= top_n:
        return df
    sorted_runs = sorted(method_best_checkpoints.items(), key=lambda x: x[1]["score"], reverse=True)
    top_run_names = {info["run_name"] for _, info in sorted_runs[:top_n]}
    # Match by agent number
    top_agent_nums = {name.rsplit("_", 1)[-1] for name in top_run_names}
    return df[df["run_name"].apply(lambda x: x.rsplit("_", 1)[-1] in top_agent_nums)]

def compute_rates(df, noise_labels, metric="success"):
    """Compute success or break rates for each noise level. Returns zeros if df is empty."""
    if df.empty:
        return [0] * len(noise_labels)
    
    rates = []
    for noise_label in noise_labels:
        subset = df[df["noise_level"] == noise_label]
        if not subset.empty:
            total = subset["total"].sum()
            rate = 100 * subset[metric].sum() / total
            rates.append(rate)
        else:
            rates.append(0)
    return rates

# Setup
clearance_keys = list(CLEARANCE_LEVELS.keys())
method_names = list(CLEARANCE_LEVELS[clearance_keys[0]].keys())
noise_labels = list(NOISE_LEVELS.keys())
n_clearances = len(clearance_keys)
x = np.arange(len(noise_labels))

# Create figure
fig, axes = plt.subplots(2, n_clearances, figsize=FIGSIZE, dpi=DPI)
fig.suptitle(SUPTITLE, fontsize=FONT_SUPTITLE)

# Plot each clearance level
for col, clearance in enumerate(clearance_keys):
    ax_success = axes[0, col]
    ax_break = axes[1, col]
    
    display_name = CLEARANCE_DISPLAY_NAMES.get(clearance, clearance)
    is_na_clearance = clearance in NA_CLEARANCES
    
    # Plot bars for each method (success rate)
    for i, method_name in enumerate(method_names):
        df = noise_data[clearance][method_name]
        df = filter_top_n_runs(df, best_checkpoints[clearance][method_name], TOP_N_POLICIES)
        
        success_rates = compute_rates(df, noise_labels, "success")
        offset = (i - len(method_names)/2 + 0.5) * BAR_WIDTH
        
        ax_success.bar(x + offset, success_rates, BAR_WIDTH, 
                       label=method_name, color=COLORS[method_name])
        
        # Only plot break rates if not an N/A clearance
        if not is_na_clearance:
            break_rates = compute_rates(df, noise_labels, "breaks")
            ax_break.bar(x + offset, break_rates, BAR_WIDTH,
                         label=method_name, color=COLORS[method_name])
    
    # Configure success rate subplot
    ax_success.set_title(display_name, fontsize=FONT_TITLE)
    ax_success.set_xticks(x)
    ax_success.set_xticklabels(noise_labels, fontsize=FONT_TICK)
    ax_success.set_ylim(SUCCESS_Y_LIM)
    ax_success.set_yticks(SUCCESS_Y_TICKS)
    ax_success.tick_params(axis='y', labelsize=FONT_TICK)
    
    # Configure break rate subplot
    if is_na_clearance:
        # Clear the axes and show N/A text
        ax_break.set_xticks([])
        ax_break.set_yticks([])
        ax_break.text(0.5, 0.5, NA_TEXT, transform=ax_break.transAxes,
                      fontsize=FONT_NA, ha='center', va='center',
                      style='italic', color='gray')
        # Keep the spines for visual consistency
        for spine in ax_break.spines.values():
            spine.set_visible(True)
    else:
        ax_break.set_xlabel(X_LABEL, fontsize=FONT_AXIS_LABEL)
        ax_break.set_xticks(x)
        ax_break.set_xticklabels(noise_labels, fontsize=FONT_TICK)
        ax_break.set_ylim(BREAK_Y_LIM)
        ax_break.set_yticks(BREAK_Y_TICKS)
        ax_break.tick_params(axis='y', labelsize=FONT_TICK)
    
    # Only show y-axis label on leftmost plots
    if col == 0:
        ax_success.set_ylabel(SUCCESS_Y_LABEL, fontsize=FONT_AXIS_LABEL)
        ax_break.set_ylabel(BREAK_Y_LABEL, fontsize=FONT_AXIS_LABEL)
    
    # Only show legend on first plot
    if col == 0:
        ax_success.legend(fontsize=FONT_LEGEND, loc='upper left')
    
    # Add gold highlight to spines if this is the highlighted clearance
    if HIGHLIGHT_CLEARANCE_PLOT is not None and clearance == HIGHLIGHT_CLEARANCE_PLOT:
        for spine in ['top', 'left', 'right', 'bottom']:
            ax_success.spines[spine].set_color(HIGHLIGHT_COLOR)
            ax_success.spines[spine].set_linewidth(HIGHLIGHT_LINEWIDTH)
            ax_break.spines[spine].set_color(HIGHLIGHT_COLOR)
            ax_break.spines[spine].set_linewidth(HIGHLIGHT_LINEWIDTH)

plt.tight_layout(rect=[0, 0, 1, 0.95])  # Leave room for suptitle
plt.show()