# Midterm Sweep — Auxiliary Value (RGB)

Pull WANDB sweep `y8whrxxa`, cache the payload locally, and compare multi-gamma auxiliary value variants against the original TD-MPC2 pixel baselines.

In [1]:
from pathlib import Path
import sys

NOTEBOOK_DIR = Path().resolve()
REPO_ROOT = NOTEBOOK_DIR.parent.parent
if not (REPO_ROOT / "analysis" / "tools").exists():
    raise RuntimeError("Unable to locate analysis/tools package from notebook directory")
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

In [2]:
import pandas as pd

from analysis.tools import aggregations, baselines, naming, plotting, wandb_io, paths
from analysis.tools.paths import ensure_dir

In [3]:
NOTEBOOK_STEM = "midterm_aux_value_rgb"
SWEEP_ROOT = Path("../../sweep_list/midterm_sweep/1aux_value/rgb").resolve()
SWEEP_ID = SWEEP_ROOT.joinpath("id.txt").read_text().strip()
WANDB_PROJECT = SWEEP_ROOT.joinpath("project.txt").read_text().strip()
WANDB_ENTITY = "thomasevers9"
HISTORY_KEYS = [
    "eval/episode_reward",
    "eval/step",
    "train/step",
    "global_step",
    "total_env_steps",
    "step",
    "_step",
]
STEP_KEYS = ["eval/step", "train/step", "global_step", "total_env_steps", "step", "_step"]
METRIC_KEY = "eval/episode_reward"
STEP_TARGET = 500_000
RESULTS_DIR = paths.notebook_results_dir(NOTEBOOK_STEM)
RESULTS_DIR

PosixPath('/home/thoma/projects/auxiliary_targets/tdmpc2/analysis/results/midterm_aux_value_rgb')

In [4]:
runs_payload, manifest, data_source = wandb_io.fetch_sweep_runs(
    entity=WANDB_ENTITY,
    project=WANDB_PROJECT,
    sweep_id=SWEEP_ID,
    history_keys=HISTORY_KEYS,
    use_cache=True,
    force_refresh=False,
)
print(f"Loaded {manifest['run_count']} runs from {data_source}")

Loaded 90 runs from cache


In [5]:
CONFIG_TO_COLUMNS = {"task": "task", "seed": "seed", "multi_gamma_gammas": "multi_gamma_gammas"}

runs_df = aggregations.runs_history_to_frame(
    runs_payload,
    metric_key=METRIC_KEY,
    step_keys=STEP_KEYS,
    config_to_columns=CONFIG_TO_COLUMNS,
)
runs_df["task_baseline"] = runs_df["task"].map(naming.wandb_task_to_baseline)
runs_df["variant_label"] = runs_df["multi_gamma_gammas"].apply(
    lambda gammas: ", ".join(str(x) for x in gammas) if isinstance(gammas, tuple) else str(gammas)
)
runs_df.head()

Unnamed: 0,task,seed,multi_gamma_gammas,run_id,step,eval/episode_reward,task_baseline,variant_label
0,finger_turn_hard,304,"(0.8,)",0og59m15,0,0.0,finger-turn-hard,0.8
1,finger_turn_hard,304,"(0.8,)",0og59m15,25000,194.100006,finger-turn-hard,0.8
2,finger_turn_hard,304,"(0.8,)",0og59m15,50000,158.5,finger-turn-hard,0.8
3,finger_turn_hard,304,"(0.8,)",0og59m15,75000,493.899994,finger-turn-hard,0.8
4,finger_turn_hard,304,"(0.8,)",0og59m15,100000,293.899994,finger-turn-hard,0.8


In [6]:
print("Available history keys:", runs_payload[0]["history"]["keys"] if runs_payload else "<no runs>")

Available history keys: ['eval/episode_reward', 'eval/step', 'train/step', 'global_step', 'total_env_steps', 'step', '_step']


In [7]:
tasks = sorted(runs_df["task_baseline"].unique())
baseline_df = baselines.load_many(tasks)
baseline_df.head()

Unnamed: 0,step,reward,seed,task
0,0,5.4,2,acrobot-swingup
1,100000,8.0,2,acrobot-swingup
2,200000,45.4,2,acrobot-swingup
3,300000,48.2,2,acrobot-swingup
4,400000,76.9,2,acrobot-swingup


In [8]:
summary_tables = []

for task in tasks:
    task_df = runs_df[runs_df["task_baseline"] == task]
    if task_df.empty:
        raise ValueError(f"No sweep rows found for task {task}")
    if not (task_df["step"] == STEP_TARGET).any():
        raise ValueError(f"Sweep data missing step {STEP_TARGET} for task {task}")

    baseline_task_df = baseline_df[baseline_df["task"] == task]
    if baseline_task_df.empty:
        raise ValueError(f"Baseline CSV missing task {task}")
    if not (baseline_task_df["step"] == STEP_TARGET).any():
        raise ValueError(f"Baseline data missing step {STEP_TARGET} for task {task}")

    task_dir = ensure_dir(RESULTS_DIR / task)
    sample_fig = plotting.sample_efficiency_figure(
        frame=task_df,
        metric_key=METRIC_KEY,
        variant_column="multi_gamma_gammas",
        task_name=task,
        baseline_frame=baseline_task_df,
        baseline_label="tdmpc2-pixels",
    )
    plotting.write_png(
        sample_fig,
        output_path=task_dir / "sample_efficiency.png",
    )

    agg = aggregations.aggregate_at_step(
        task_df,
        step_value=STEP_TARGET,
        metric_key=METRIC_KEY,
        group_cols=["multi_gamma_gammas"],
    )

    baseline_at_step = baseline_task_df[baseline_task_df["step"] == STEP_TARGET]
    bar_fig = plotting.bar_chart_at_step(
        aggregated_frame=agg,
        metric_column="mean_reward",
        variant_column="multi_gamma_gammas",
        baseline_rows=baseline_at_step,
        baseline_label="tdmpc2-pixels",
        task_name=task,
    )
    plotting.write_png(
        bar_fig,
        output_path=task_dir / "reward_500k.png",
    )

    agg["task"] = task
    agg["baseline_reward"] = baseline_at_step["reward"].mean()
    summary_tables.append(agg)

summary_df = pd.concat(summary_tables, ignore_index=True)
summary_csv_path = RESULTS_DIR / "reward_500k_summary.csv"
summary_df.to_csv(summary_csv_path, index=False)
summary_df

Unnamed: 0,multi_gamma_gammas,mean_reward,std_reward,num_runs,task,baseline_reward
0,"(0.8,)",208.871292,57.913434,3,acrobot-swingup,52.566667
1,"(0.8, 0.95)",239.959452,84.430606,3,acrobot-swingup,52.566667
2,"(0.95,)",179.534668,40.430891,3,acrobot-swingup,52.566667
3,"(0.8,)",599.306681,74.810026,3,cheetah-run,454.933333
4,"(0.8, 0.95)",597.963928,9.072433,3,cheetah-run,454.933333
5,"(0.95,)",606.486287,36.327965,3,cheetah-run,454.933333
6,"(0.8,)",983.633341,6.997408,3,finger-spin,982.933333
7,"(0.8, 0.95)",982.433329,5.921432,3,finger-spin,982.933333
8,"(0.95,)",978.200012,9.704134,3,finger-spin,982.933333
9,"(0.8,)",674.93335,107.804747,3,finger-turn-easy,580.866667
