In [49]:
import sys
from pathlib import Path

# Add analysis tools to path
ANALYSIS_ROOT = Path.cwd().parent
if str(ANALYSIS_ROOT) not in sys.path:
    sys.path.insert(0, str(ANALYSIS_ROOT))

from tools import wandb_io, aggregations, baselines, plotting, paths, config
from tools.style import STYLE
import pandas as pd

In [50]:
# Configuration
NOTEBOOK_STEM = "07_optimistic_exploration_ablation"
RESULTS_DIR = paths.notebook_results_dir(NOTEBOOK_STEM)

SWEEP_ID = "mpjo8uje"
TASK = "quadruped-walk"

HISTORY_KEYS = [
    "eval/episode_reward",
    "_step",
]

print(f"Results will be saved to: {RESULTS_DIR}")

Results will be saved to: /gpfs/work4/0/prjs0951/Thomas/Thesis/RL_weather/tdmpc2-with-return-based-auxiliary-tasks/analysis/results/07_optimistic_exploration_ablation


In [51]:
# Fetch W&B data
runs, manifest, source = wandb_io.fetch_sweep_runs(
    entity=config.WANDB_ENTITY,
    project=config.WANDB_PROJECT,
    sweep_id=SWEEP_ID,
    history_keys=HISTORY_KEYS,
    use_cache=True,
    force_refresh=False,
)

print(f"Loaded {len(runs)} runs from {source}")
print(f"Fetched at: {manifest.get('fetched_at', 'unknown')}")

Loaded 80 runs from cache
Fetched at: 2025-12-17T13:22:47.698626+00:00


In [52]:
# Convert to DataFrame
runs_df = aggregations.runs_history_to_frame(
    runs,
    metric_key="eval/episode_reward",
    step_keys=["_step", "step"],
    config_to_columns={
        "planner_head_reduce": "head_reduce",
        "planner_action_noise_std": "action_noise",
        "seed": "seed",
    },
)

# Create a combined label for the condition
runs_df["condition"] = runs_df.apply(
    lambda row: f"{row['head_reduce']}, noise={row['action_noise']}",
    axis=1
)

print(f"DataFrame shape: {runs_df.shape}")
print(f"\nConditions found:")
print(runs_df.groupby(["head_reduce", "action_noise"]).size())

DataFrame shape: (1600, 7)

Conditions found:
head_reduce  action_noise
max          0.00            200
             0.03            200
             0.10            200
             0.30            200
mean         0.00            200
             0.03            200
             0.10            200
             0.30            200
dtype: int64


In [53]:
# Load baseline for comparison
baseline_df = baselines.load_task_baseline(TASK, root=paths.BASELINE_TDMPC2)
print(f"Baseline data points: {len(baseline_df)}")

Baseline data points: 123


In [54]:
# Create the main figure
import plotly.graph_objects as go

# ICML 2026 double-column format: figure will be ~half A4 width
# Need large fonts and thick lines for readability at reduced size
# Tall and narrow aspect ratio with legend at bottom
TITLE_FONT_SIZE = 22
SUBTITLE_FONT_SIZE = 18
AXIS_LABEL_FONT_SIZE = 22
TICK_FONT_SIZE = 20
LEGEND_FONT_SIZE = 16
LINE_WIDTH = 3.5
FIGURE_WIDTH = 550   # Narrower
FIGURE_HEIGHT = 620  # Taller to fit two-line title and legend

def create_exploration_ablation_figure(
    df: pd.DataFrame,
    metric_key: str = "eval/episode_reward",
    task_name: str = "Quadruped Walk",
    max_step: int = 100000,
    y_max: int = 1000,
) -> go.Figure:
    """Create sample efficiency figure for exploration ablation.
    
    Shows: max (no noise) vs mean (with varying noise levels).
    Demonstrates that optimistic (max) head selection outperforms
    pessimistic (mean) with additive exploration noise.
    """
    fig = go.Figure()
    
    # Filter to max_step
    df = df[df["step"] <= max_step]
    
    # Group by condition and step, compute mean across seeds
    summary = (
        df.groupby(["head_reduce", "action_noise", "step"], as_index=False)
        .agg(mean_reward=(metric_key, "mean"))
    )
    
    # Define visual encodings - color encodes noise level
    noise_colors = {
        0.0: STYLE.COLORS[0],   # Cyan - no noise
        0.03: STYLE.COLORS[1],  # Pink
        0.1: STYLE.COLORS[2],   # Red
        0.3: STYLE.COLORS[3],   # Yellow
    }
    
    # Lines to plot:
    # - max with noise=0 (our method, solid cyan)
    # - mean with all noise levels (dashed, colored by noise)
    conditions_to_plot = [
        ("max", 0.0),    # Our method
        ("mean", 0.0),
        ("mean", 0.03),
        ("mean", 0.1),
        ("mean", 0.3),
    ]
    
    for head_reduce, action_noise in conditions_to_plot:
        mask = (summary["head_reduce"] == head_reduce) & (summary["action_noise"] == action_noise)
        group = summary[mask].sort_values("step")
        
        if group.empty:
            continue
        
        color = noise_colors.get(action_noise, STYLE.COLORS[4])
        dash = "solid" if head_reduce == "max" else "dash"
        
        # Create descriptive label
        if head_reduce == "max":
            label = "Maximum (σ=0)"
        else:
            label = f"Mean (σ={action_noise})"
        
        # Add mean line only (no std band)
        fig.add_trace(
            go.Scatter(
                x=group["step"],
                y=group["mean_reward"],
                mode="lines",
                name=label,
                line=dict(color=color, dash=dash, width=LINE_WIDTH),
                hovertemplate=f"{label}<br>Step=%{{x}}<br>Reward=%{{y:.1f}}<extra></extra>",
            )
        )
    
    # Apply styling with publication-ready fonts
    num_seeds = df["seed"].nunique()
    
    # Two-line title: main title + subtitle with task/seeds
    title_text = f"Exploration Strategy Ablation<br><span style='font-size:{SUBTITLE_FONT_SIZE}px'>{task_name} ({num_seeds} seeds)</span>"
    
    fig.update_layout(
        title=dict(
            text=title_text,
            font=dict(size=TITLE_FONT_SIZE),
            y=0.96,
            x=0.5,
            xanchor="center",
        ),
        xaxis_title=dict(text="Environment Steps", font=dict(size=AXIS_LABEL_FONT_SIZE)),
        yaxis_title=dict(text="Evaluation Episode Reward", font=dict(size=AXIS_LABEL_FONT_SIZE)),
        font=dict(size=TICK_FONT_SIZE),  # Base font size
        plot_bgcolor="white",
        paper_bgcolor="white",
        width=FIGURE_WIDTH,
        height=FIGURE_HEIGHT,
    )
    
    # Set axis ranges and add grid lines
    fig.update_xaxes(
        range=[0, max_step],
        showgrid=True,
        gridwidth=1,
        gridcolor="rgba(200,200,200,0.5)",
        showline=True,
        linewidth=2,
        linecolor="black",
        tickfont=dict(size=TICK_FONT_SIZE),
    )
    fig.update_yaxes(
        range=[0, y_max],
        showgrid=True,
        gridwidth=1,
        gridcolor="rgba(200,200,200,0.5)",
        showline=True,
        linewidth=2,
        linecolor="black",
        tickfont=dict(size=TICK_FONT_SIZE),
    )
    
    # Position legend at the bottom in a grid (3 on top, 2 below)
    fig.update_layout(
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.22,
            xanchor="center",
            x=0.5,
            bgcolor="rgba(255,255,255,0.9)",
            font=dict(size=LEGEND_FONT_SIZE),
            entrywidth=150,
            entrywidthmode="pixels",
        ),
        margin=dict(t=80, b=140),  # Top margin for two-line title
    )
    
    return fig


fig = create_exploration_ablation_figure(
    runs_df,
    task_name="Quadruped Walk",
    max_step=100000,
    y_max=1000,
)
fig.show()

In [55]:
# Save the figure
output_path = RESULTS_DIR / "exploration_ablation.png"
plotting.write_png(fig, output_path=output_path)
print(f"Saved figure to: {output_path}")

Saved figure to: /gpfs/work4/0/prjs0951/Thomas/Thesis/RL_weather/tdmpc2-with-return-based-auxiliary-tasks/analysis/results/07_optimistic_exploration_ablation/exploration_ablation.png


In [56]:
# Final performance comparison table
final_step = runs_df["step"].max()
print(f"Comparing at step {final_step}")

final_summary = (
    runs_df[runs_df["step"] == final_step]
    .groupby(["head_reduce", "action_noise"], as_index=False)
    .agg(
        mean_reward=("eval/episode_reward", "mean"),
        std_reward=("eval/episode_reward", "std"),
        n_seeds=("seed", "nunique"),
    )
)

final_summary["reward"] = final_summary.apply(
    lambda r: f"{r['mean_reward']:.1f} ± {r['std_reward']:.1f}",
    axis=1
)

display_df = final_summary[["head_reduce", "action_noise", "reward", "n_seeds"]].copy()
display_df.columns = ["Head Reduce", "Action Noise", "Reward (mean ± std)", "Seeds"]
display_df = display_df.sort_values(["Head Reduce", "Action Noise"], ascending=[False, True])

print(f"\nFinal Performance at {final_step} steps:")
display_df

Comparing at step 100000

Final Performance at 100000 steps:


Unnamed: 0,Head Reduce,Action Noise,Reward (mean ± std),Seeds
4,mean,0.0,531.7 ± 272.9,9
5,mean,0.03,572.9 ± 358.9,10
6,mean,0.1,490.5 ± 370.5,10
7,mean,0.3,543.7 ± 351.8,10
0,max,0.0,943.8 ± 60.9,9
1,max,0.03,956.5 ± 13.0,10
2,max,0.1,958.4 ± 13.5,10
3,max,0.3,942.0 ± 24.3,10
