# Small Model Sweep — State Observations

Analyze sweep `8finalsweep/small_model` to evaluate how reducing model size interacts with imagination rollouts, entropy regularization, and update-to-data ratio on state-based tasks.

In [1]:
from pathlib import Path
import sys

NOTEBOOK_DIR = Path().resolve()
REPO_ROOT = NOTEBOOK_DIR.parent.parent
if not (REPO_ROOT / "analysis" / "tools").exists():
    raise RuntimeError("Unable to locate analysis/tools package from notebook directory")
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

In [None]:
import pandas as pd

from analysis.tools import aggregations, baselines, naming, plotting, selection, wandb_io, paths
from analysis.tools.paths import ensure_dir

In [3]:
NOTEBOOK_STEM = "small_model_state_sweep"
SWEEP_ROOT = Path("../../sweep_list/midterm_sweep/8finalsweep/small_model").resolve()
SWEEP_ID = SWEEP_ROOT.joinpath("id.txt").read_text().strip()
WANDB_PROJECT = SWEEP_ROOT.joinpath("project.txt").read_text().strip()
WANDB_ENTITY = "thomasevers9"
HISTORY_KEYS = [
    "eval/episode_reward",
    "eval/step",
    "train/step",
    "global_step",
    "total_env_steps",
    "step",
    "_step",
]
STEP_KEYS = ["eval/step", "train/step", "global_step", "total_env_steps", "step", "_step"]
METRIC_KEY = "eval/episode_reward"
STEP_TARGET = 100_000
BASELINE_LABEL = "tdmpc2-state"
HYPERPARAM_COLUMNS = ["num_rollouts", "entropy_coef", "utd_ratio"]
ENCODING_SPECS = {
    "color": {"column": "num_rollouts", "label": "num_rollouts"},
    "dash": {"column": "entropy_coef", "label": "entropy_coef"},
    "width": {"column": "utd_ratio", "label": "utd_ratio", "values": {2: 2.5, 4: 4.0}},
}
RESULTS_DIR = paths.notebook_results_dir(NOTEBOOK_STEM)
RESULTS_DIR

PosixPath('/home/thoma/projects/auxiliary_targets/tdmpc2/analysis/results/small_model_state_sweep')

In [4]:
runs_payload, manifest, data_source = wandb_io.fetch_sweep_runs(
    entity=WANDB_ENTITY,
    project=WANDB_PROJECT,
    sweep_id=SWEEP_ID,
    history_keys=HISTORY_KEYS,
    use_cache=True,
    force_refresh=False,
)
print(f"Loaded {manifest['run_count']} runs from {data_source}")

Downloading sweep 1qzum3ai: 100%|██████████| 48/48 [00:53<00:00,  1.11s/it]

Loaded 48 runs from remote





In [5]:
CONFIG_TO_COLUMNS = {
    "task": "task",
    "seed": "seed",
    "num_rollouts": "num_rollouts",
    "entropy_coef": "entropy_coef",
    "utd_ratio": "utd_ratio",
    "model_size": "model_size",
    "obs": "obs",
}

runs_df = aggregations.runs_history_to_frame(
    runs_payload,
    metric_key=METRIC_KEY,
    step_keys=STEP_KEYS,
    config_to_columns=CONFIG_TO_COLUMNS,
)
runs_df["task_baseline"] = runs_df["task"].map(naming.wandb_task_to_baseline)
runs_df.head()

Unnamed: 0,task,seed,num_rollouts,entropy_coef,utd_ratio,model_size,obs,run_id,step,eval/episode_reward,task_baseline
0,acrobot-swingup,102,4,1e-05,4,1,state,02n9anon,0,1.467343,acrobot-swingup
1,acrobot-swingup,102,4,1e-05,4,1,state,02n9anon,2500,13.102516,acrobot-swingup
2,acrobot-swingup,102,4,1e-05,4,1,state,02n9anon,5000,24.426167,acrobot-swingup
3,acrobot-swingup,102,4,1e-05,4,1,state,02n9anon,7500,50.681969,acrobot-swingup
4,acrobot-swingup,102,4,1e-05,4,1,state,02n9anon,10000,31.803137,acrobot-swingup


In [6]:
obs_modes = sorted(runs_df["obs"].unique())
if not obs_modes:
    raise ValueError("No observation modalities detected in runs_df")
if len(obs_modes) > 1:
    raise ValueError(f"Multiple observation modalities found {obs_modes}; mixed-mode handling not implemented")
obs_mode = obs_modes[0]
if obs_mode == "rgb":
    baseline_root = baselines.PIXEL_BASELINE_ROOT
elif obs_mode == "state":
    baseline_root = baselines.STATE_BASELINE_ROOT
else:
    raise ValueError(f"Unsupported observation modality '{obs_mode}'")

baseline_tasks = sorted(runs_df["task_baseline"].unique())
available_tasks = [
    task for task in baseline_tasks if baselines.has_task(task, root=baseline_root)
]
missing_tasks = sorted(set(baseline_tasks) - set(available_tasks))
if missing_tasks:
    print("Skipping tasks without baseline CSV:", ", ".join(missing_tasks))
if not available_tasks:
    raise ValueError(f"No baselines available under {baseline_root} for tasks {baseline_tasks}")

baseline_df = baselines.load_many(available_tasks, root=baseline_root)
baseline_df.head()

Unnamed: 0,step,reward,seed,task
0,0,2.4,3,acrobot-swingup
1,100000,275.1,3,acrobot-swingup
2,200000,246.4,3,acrobot-swingup
3,300000,330.8,3,acrobot-swingup
4,400000,321.6,3,acrobot-swingup


In [7]:
summary_tables = []

for task in sorted(runs_df["task"].unique()):
    task_df = runs_df[runs_df["task"] == task]
    if task_df.empty:
        raise ValueError(f"No rows found for task {task}")

    baseline_name = naming.wandb_task_to_baseline(task)
    baseline_task_df = baseline_df[baseline_df["task"] == baseline_name]
    if baseline_task_df.empty:
        print(f"Skipping task {task} — baseline CSV not available")
        continue

    task_dir = ensure_dir(RESULTS_DIR / task)
    encoded_fig = plotting.sample_efficiency_encoded_figure(
        frame=task_df,
        metric_key=METRIC_KEY,
        task_name=task,
        baseline_frame=baseline_task_df,
        baseline_label=BASELINE_LABEL,
        encodings=ENCODING_SPECS,
    )
    plotting.write_png(
        encoded_fig,
        output_path=task_dir / "sample_efficiency.png",
    )

    agg = aggregations.aggregate_at_step(
        task_df,
        step_value=STEP_TARGET,
        metric_key=METRIC_KEY,
        group_cols=HYPERPARAM_COLUMNS,
    )
    agg["task"] = task
    agg["model_size"] = task_df["model_size"].iloc[0]
    summary_tables.append(agg)

if not summary_tables:
    raise ValueError("No summary tables were generated")

summary_df = pd.concat(summary_tables, ignore_index=True)
summary_csv_path = RESULTS_DIR / f"reward_summary_{STEP_TARGET}.csv"
summary_df.to_csv(summary_csv_path, index=False)
summary_df

Unnamed: 0,num_rollouts,entropy_coef,utd_ratio,mean_reward,std_reward,num_runs,task,model_size
0,4,1e-05,2,409.270447,23.199265,2,acrobot-swingup,1
1,4,1e-05,4,290.513779,105.577615,2,acrobot-swingup,1
2,4,0.0001,2,401.67543,103.807906,2,acrobot-swingup,1
3,4,0.0001,4,242.534302,101.74526,2,acrobot-swingup,1
4,16,1e-05,2,386.211075,27.265928,2,acrobot-swingup,1
5,16,1e-05,4,303.757355,79.948728,2,acrobot-swingup,1
6,16,0.0001,2,405.738678,128.552695,2,acrobot-swingup,1
7,16,0.0001,4,208.381023,142.732707,2,acrobot-swingup,1
8,4,1e-05,2,46.108025,,1,hopper-hop,1
9,4,1e-05,4,217.228027,0.0,2,hopper-hop,1


In [8]:
hyperparam_summary = aggregations.aggregate_at_step(
    runs_df,
    step_value=STEP_TARGET,
    metric_key=METRIC_KEY,
    group_cols=HYPERPARAM_COLUMNS,
)
hyperparam_summary.to_csv(RESULTS_DIR / f"hyperparam_summary_{STEP_TARGET}.csv", index=False)
hyperparam_summary.sort_values("mean_reward", ascending=False).head(10)

Unnamed: 0,num_rollouts,entropy_coef,utd_ratio,mean_reward,std_reward,num_runs
0,4,1e-05,2,614.728392,381.652449,6
2,4,0.0001,2,488.820475,367.914682,6
6,16,0.0001,2,485.563539,363.947542,6
1,4,1e-05,4,478.612717,352.916936,6
5,16,1e-05,4,473.38828,355.095025,6
7,16,0.0001,4,436.446477,405.277111,6
4,16,1e-05,2,422.728429,375.216892,6
3,4,0.0001,4,379.415909,281.976951,6
