- - -
## Data Loading & Augmentation
- - -

In [None]:
import os
from io_agent.runner.iterative import augment_mujoco_dataset, registered_envs
from io_agent.plant.mujoco import Walker2dEnv, HopperEnv, HalfCheetahEnv


for name, env_class in registered_envs.items():
    save_dir = f"./{name}_data/dataset"
    file_name = "rich_augmented"

    if not os.path.exists(os.path.join(save_dir, file_name)):
        env = env_class()
        augment_mujoco_dataset(
            env=env,
            save_dir=save_dir,
            file_name=file_name,
        )


- - -
## Iterative IO Controller Training
- - -

In [None]:
import numpy as np
import torch
import multiprocessing

from io_agent.runner.iterative import run_iterative_io, IterativeIOArgs


n_cpu = multiprocessing.cpu_count()
n_trials = 20
general_seed = 42
seed_rng = np.random.default_rng(general_seed)
*trial_seeds, train_seed = seed_rng.integers(0, 2**30, n_trials + 1)
device = "cuda" if torch.cuda.is_available() else "cpu"


experiment_args = {
    "Walker-IO-1e4": IterativeIOArgs(
        lr_exp_decay=0.9975,
        learning_rate=5e-2,
        n_batch=64,
        data_size=int(1e4),
        eval_epochs=tuple(range(0, 2601, 100)),
        env_name="walker",
        work_dir="./walker_data"),
    "Hopper-IO-5e3": IterativeIOArgs(
        lr_exp_decay=0.995,
        learning_rate=5e-2,
        n_batch=64,
        data_size=int(5e3),
        env_name="hopper",
        eval_epochs=tuple(range(0, 1001, 100)),
        work_dir="./hopper_data"),
    "Cheetah-IO-1e4": IterativeIOArgs(
        lr_exp_decay=0.9925,
        learning_rate=5e-2,
        n_batch=64,
        data_size=int(1e4),
        eval_epochs=tuple(range(0, 2601, 100)),
        env_name="cheetah",
        work_dir="./cheetah_data"),
}

results = {}
for key, args in experiment_args.items():
    costs, epoch_losses, step_losses, iterative_io_agent = run_iterative_io(
        args=args,
        seed=train_seed,
        trial_seeds=trial_seeds,
        name=key,
        device=device,
        verbose=True)
    results[key] = (costs, epoch_losses, step_losses)

- - -
## Visualizing the Training
- - -

In [None]:
# Smoothing with last n steps
from collections import deque
from itertools import chain

from io_agent.plant.mujoco import HopperEnv


queue = deque(maxlen=5)
env = HopperEnv()

smooth_scores = {}
for name, scores in results.items():
    smooth_scores[name] = {}
    for key, values in scores[0].items():
        queue.append(values)
        smooth_scores[name][key] = list(map(lambda x: env.env.get_normalized_score(x) * 100, chain(*queue)))


In [None]:
import numpy as np
from collections import defaultdict

from io_agent.plotter import tube_figure_plt


fig, axes = tube_figure_plt(
    cost_data=smooth_scores,
    title=f"",
    log_xaxis=False,
    log_yaxis=False,
    x_label="epoch",
    y_label="episodic score (%)",
    percentiles=(20, 80)
)

fig, axes = tube_figure_plt(
    cost_data={key: {index + 1: value for index,
                     value in enumerate(value[1])} for key, value in results.items()},
    title=f"",
    log_xaxis=True,
    log_yaxis=True,
    x_label="epoch",
    y_label="sub loss",
    percentiles=(20, 80)
)

fig, axes = tube_figure_plt(
    cost_data={key: {index + 1: value for index,
                     value in enumerate(value[2])} for key, value in results.items()},
    title=f"",
    log_xaxis=True,
    log_yaxis=True,
    x_label="gradient step",
    y_label="batch sub loss",
    percentiles=(20, 80)
)