# UTD Ratio Ablation Analysis (Quadruped Walk)

This notebook analyzes the effect of the Update-To-Data (UTD) ratio on the `quadruped-walk` task.
We compare our method (with various UTD ratios) against standard baselines:
- **DreamerV3**
- **SAC**
- **TD-MPC2** (Original)

Data is fetched from W&B Sweep `qpz1ybsx`.

In [1]:
import sys
from pathlib import Path

# Add analysis/tools to path
NOTEBOOK_DIR = Path().resolve()
REPO_ROOT = NOTEBOOK_DIR.parent.parent.parent
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

print(f"Repository Root: {REPO_ROOT}")

Repository Root: /home/thomas/projects/Research/Masters Thesis/tdmpc2-with-return-based-auxiliary-tasks


In [2]:
import pandas as pd
import plotly.graph_objects as go

from analysis.tools import wandb_io, plotting, baselines, encodings, aggregations, paths

# Configuration
ENTITY = "thomasevers9"
PROJECT = "tdmpc2-tdmpc2"
SWEEP_ID = "qpz1ybsx"
TASK = "quadruped-walk"
METRIC = "eval/episode_reward"
TIME_METRIC = "train/elapsed_time" # Or just 'elapsed_time' depending on logger
# Add 'eval/step' and '_step' to ensure we find a valid step key
STEP_KEYS = ["total_env_steps", "global_step", "step", "eval/step", "_step"]
MAX_STEPS = 100_000

In [3]:
# 1. Fetch Sweep Data
print(f"Fetching sweep {SWEEP_ID}...")
runs, manifest, source = wandb_io.fetch_sweep_runs(
    entity=ENTITY,
    project=PROJECT,
    sweep_id=SWEEP_ID,
    history_keys=[METRIC, TIME_METRIC] + STEP_KEYS,
    use_cache=True,
    force_refresh=True, # Force refresh to ensure we get the new metric
)

# Convert to DataFrame
# We want to extract 'utd_ratio' from the config
sweep_frame = aggregations.runs_history_to_frame(
    runs,
    metric_key=METRIC,
    step_keys=STEP_KEYS,
    config_to_columns={"utd_ratio": "utd_ratio", "seed": "seed"},
)

# Also fetch time data
# We need a separate frame or merge it. 
# Since time is logged at different frequencies (train vs eval), let's fetch it separately.
time_frame = aggregations.runs_history_to_frame(
    runs,
    metric_key=TIME_METRIC,
    step_keys=STEP_KEYS,
    config_to_columns={"utd_ratio": "utd_ratio", "seed": "seed"},
)

# Filter for the specific task if the sweep contains multiple (though user said it's for quadruped-walk)
# If 'task' is in config, we could filter. Assuming sweep is focused or we check config.
# Let's check if 'task' is in the first run's config just to be safe, or just proceed.
# The user said "On the quadruple run, quadruped walk task", implying the sweep might be specific or mixed.
# We'll assume the sweep data is relevant.

# Filter to 100k steps
sweep_frame = sweep_frame[sweep_frame["step"] <= MAX_STEPS]
time_frame = time_frame[time_frame["step"] <= MAX_STEPS]

print(f"Loaded {len(sweep_frame)} rows from {source}.")
sweep_frame.head()

Fetching sweep qpz1ybsx...


Downloading sweep qpz1ybsx: 100%|██████████| 16/16 [00:28<00:00,  1.81s/it]

Loaded 160 rows from remote.





Unnamed: 0,utd_ratio,seed,run_id,step,eval/episode_reward
0,4,303,21sglwap,10000,88.845596
1,4,303,21sglwap,20000,358.743561
2,4,303,21sglwap,30000,162.805084
3,4,303,21sglwap,40000,575.623291
4,4,303,21sglwap,50000,319.771301


In [4]:
# 2. Load Baselines
print("Loading baselines...")

baselines_dict = {}

# DreamerV3
if baselines.has_task(TASK, root=baselines.DREAMERV3_BASELINE_ROOT):
    df = baselines.load_task_baseline(TASK, root=baselines.DREAMERV3_BASELINE_ROOT)
    baselines_dict["DreamerV3"] = df[df["step"] <= MAX_STEPS]

# SAC
if baselines.has_task(TASK, root=baselines.SAC_BASELINE_ROOT):
    df = baselines.load_task_baseline(TASK, root=baselines.SAC_BASELINE_ROOT)
    baselines_dict["SAC"] = df[df["step"] <= MAX_STEPS]

# TDMPC2 (Original)
if baselines.has_task(TASK, root=baselines.STATE_BASELINE_ROOT):
    df = baselines.load_task_baseline(TASK, root=baselines.STATE_BASELINE_ROOT)
    baselines_dict["TD-MPC2"] = df[df["step"] <= MAX_STEPS]

print(f"Loaded baselines: {list(baselines_dict.keys())}")

Loading baselines...
Loaded baselines: ['DreamerV3', 'SAC', 'TD-MPC2']


In [5]:
# 3. Plotting

# We use the plotting tool to generate the main figure for our variants (UTD ratios)
# We treat 'utd_ratio' as the variant column.

# First, format the UTD ratio for the legend
sweep_frame["variant"] = sweep_frame["utd_ratio"].apply(lambda x: f"Ours (UTD={x})")
time_frame["variant"] = time_frame["utd_ratio"].apply(lambda x: f"Ours (UTD={x})")

# Convert seconds to hours
time_frame["hours"] = time_frame[TIME_METRIC] / 3600.0

# Generate base figure
fig = plotting.sample_efficiency_figure(
    frame=sweep_frame,
    metric_key=METRIC,
    variant_column="variant",
    task_name=f"{TASK} (100k Steps)",
    baseline_frame=pd.DataFrame(), # We will add baselines manually to control styling
    baseline_label="",
    baseline_step_cap=MAX_STEPS
)

# Add Baselines manually
# We want them to be distinct from "Ours".
# The plotting tool uses the TU Delft palette for variants.
# We can use gray/black/dashed for baselines.

baseline_colors = {
    "DreamerV3": "#8c564b", # Brown
    "SAC": "#7f7f7f",       # Gray
    "TD-MPC2": "#000000"    # Black
}

for name, df in baselines_dict.items():
    # Aggregate baseline
    summary = (
        df.groupby("step", as_index=False)
        .agg(mean_reward=("reward", "mean"), std_reward=("reward", "std"))
        .sort_values("step")
    )
    
    # Add trace
    fig.add_trace(
        go.Scatter(
            x=summary["step"],
            y=summary["mean_reward"],
            mode="lines",
            name=name,
            line=dict(color=baseline_colors.get(name, "gray"), dash="dash", width=2),
            legendgroup="Baselines"
        )
    )

fig.update_layout(
    legend_title_text="Algorithm / Config",
    xaxis_range=[0, MAX_STEPS]
)

fig.show()

# Save figure to results folder
output_dir = paths.notebook_results_dir("utd_ablation")
output_path = output_dir / "utd_ablation_comparison.png"
print(f"Saving plot to {output_path}...")

try:
    plotting.write_png(fig, output_path=output_path)
    print("Done.")
except RuntimeError as e:
    print(f"Could not save image: {e}")
    print("To fix this, install kaleido: pip install -U kaleido")

# --- Plot Runtime ---
print("Plotting Runtime...")
fig_time = plotting.sample_efficiency_figure(
    frame=time_frame,
    metric_key="hours",
    variant_column="variant",
    task_name=f"{TASK} Runtime (Hours)",
    baseline_frame=pd.DataFrame(),
    baseline_label="",
    baseline_step_cap=MAX_STEPS
)
fig_time.update_layout(
    yaxis_title="Elapsed Time (Hours)",
    xaxis_range=[0, MAX_STEPS]
)
fig_time.show()

output_path_time = output_dir / "utd_ablation_runtime.png"
print(f"Saving runtime plot to {output_path_time}...")
try:
    plotting.write_png(fig_time, output_path=output_path_time)
    print("Done.")
except RuntimeError as e:
    print(f"Could not save image: {e}")

Saving plot to /home/thomas/projects/Research/Masters Thesis/tdmpc2-with-return-based-auxiliary-tasks/analysis/results/utd_ablation/utd_ablation_comparison.png...
Done.
Plotting Runtime...
Done.
Plotting Runtime...


Saving runtime plot to /home/thomas/projects/Research/Masters Thesis/tdmpc2-with-return-based-auxiliary-tasks/analysis/results/utd_ablation/utd_ablation_runtime.png...
Done.
