# Dual Heater Experiments

### Lookback horizon and bias ablation

- The experiment shown in Figure 6 Left


In [None]:
from typing import List
from itertools import product
import numpy as np
import multiprocessing

from io_agent.evaluator import Transition
from io_agent.plant.dualheater import DualHeaterEnv
from io_agent.runner.basic import run_mpc, run_io_mpc
from io_agent.utils import parallelize, steady_state_cost, save_experiment


n_cpu = multiprocessing.cpu_count()
env_length = 76
horizon = 20
n_trials = n_cpu // 4
n_dataset_trials = 20
general_seed = 42
lookbacks = [0, 2, 4, 8, 12, 16]
seed_rng = np.random.default_rng(general_seed)

plant = DualHeaterEnv(max_length=env_length)
permute_seed, *trial_seeds = seed_rng.integers(0, 2**30, n_trials + 1)
dataset_trial_seeds = seed_rng.integers(0, 2**30, n_dataset_trials)

naive_plant = DualHeaterEnv(max_length=env_length)
dataset_trajectories = parallelize(
    n_proc=min(n_cpu, n_dataset_trials),
    fn=run_mpc,
    kwargs_list=[
        dict(
            plant=naive_plant,
            horizon=horizon,
            use_foresight=False,
            bias_aware=False,
            env_reset_rng=np.random.default_rng(_seed)
        ) for _seed in dataset_trial_seeds
    ],
    loading_bar_kwargs=dict(desc="MPC trials")
)


def lookback_horizon_experiment(n_past: int, add_bias: bool) -> List[List[Transition]]:
    return parallelize(
        n_proc=min(n_trials, n_cpu),
        fn=run_io_mpc(dataset_trajectories,
                      dataset_length=300,
                      n_past=n_past,
                      add_bias=add_bias,
                      expert_horizon=horizon,
                      plant=plant,
                      dataset_permute_rng=np.random.default_rng(permute_seed)),
        kwargs_list=[
            dict(env_reset_rng=np.random.default_rng(_seed))
            for _seed in trial_seeds
        ],
        loading_bar_kwargs=dict(desc=f"H:{n_past}, bias: {add_bias} MPC trials")
    )


ablation_keys = list(product(lookbacks, [False]))
ablation_trajectories = parallelize(
    n_proc=n_cpu // min(n_trials, n_cpu),
    fn=lookback_horizon_experiment,
    kwargs_list=[
        dict(n_past=n_past, add_bias=add_bias)
        for n_past, add_bias in ablation_keys
    ],
    loading_bar_kwargs=dict(desc="IO-MPC H-ablation")
)

# Save the experiment since it takes some time to complete.
save_experiment(
    values={"h_ablation": {key: trajectories for key,
                           trajectories in zip(ablation_keys, ablation_trajectories)}},
    seed=general_seed,
    exp_dir="./dualheater_data/h_ablation",
    name="figure_6_left_data")

In [None]:
import numpy as np
from utils import steady_state_cost, load_experiment
from plotter import tube_figure

exp_trajectories = load_experiment("./dualheater_data/h_ablation/figure_6_left_data-42")
io_h_trajectories = exp_trajectories["h_ablation"]

costs = {
    "w/ bias": {},
    "w/o bias": {}
}
for (h_value, bias), trajectories in io_h_trajectories.items():
    trajectories = [t for t in trajectories if t is not None]
    if len(trajectories) != 0:
        costs["w/ bias" if bias else "w/o bias"][h_value] = steady_state_cost(trajectories, 0.4)

tube_figure(
    cost_data=costs,
    title=f"Figure 6 left",
    xaxis_name="Lookback horizon"
)

- Experiments shown in figure 6 Center and Right

In [None]:
from typing import List
from itertools import product
import numpy as np
import multiprocessing

from io_agent.evaluator import Transition
from io_agent.plant.dualheater import DualHeaterEnv
from io_agent.runner.basic import run_mpc, run_io_rmpc
from io_agent.utils import parallelize, steady_state_cost, save_experiment


n_cpu = multiprocessing.cpu_count()
env_length = 76
horizon = 20
n_trials = n_cpu // 4
n_past = 2
add_bias = False
n_dataset_trials = 28
general_seed = 42
n_rhos = 28
seed_rng = np.random.default_rng(general_seed)

plant = DualHeaterEnv(max_length=env_length)
permute_seed, *trial_seeds = seed_rng.integers(0, 2**30, n_trials + 1)
dataset_trial_seeds = seed_rng.integers(0, 2**30, n_dataset_trials)

dataset_trajectories = parallelize(
    n_proc=min(n_cpu, n_dataset_trials),
    fn=run_mpc,
    kwargs_list=[
        dict(
            plant=plant,
            horizon=20,
            use_foresight=False,
            bias_aware=False,
            env_reset_rng=np.random.default_rng(_seed)
        ) for _seed in dataset_trial_seeds
    ],
    loading_bar_kwargs=dict(desc="MPC trials")
)


def rho_experiment(rho: float) -> List[List[Transition]]:
    return parallelize(
        n_proc=min(n_cpu, n_trials),
        fn=run_io_rmpc(dataset_trajectories,
                      dataset_length=300,
                      n_past=n_past,
                      add_bias=add_bias,
                      expert_horizon=horizon,
                      expert_rho=rho,
                      plant=plant,
                      dataset_permute_rng=np.random.default_rng(permute_seed)),
        kwargs_list=[
            dict(env_reset_rng=np.random.default_rng(_seed))
            for _seed in trial_seeds
        ],
        loading_bar_kwargs=dict(desc=f"rho:{rho} MPC trials")
    )


rho_values = np.logspace(-1, 2, n_rhos)
ablation_trajectories = parallelize(
    n_proc=n_cpu // min(n_cpu, n_trials),
    fn=rho_experiment,
    kwargs_list=[
        dict(rho=rho)
        for rho in rho_values
    ],
    loading_bar_kwargs=dict(desc="IO-MPC rho ablation")
)

# Save the experiment since it takes some time to complete.
save_experiment(
    values={"rho_ablation": {key: trajectories for key,
                           trajectories in zip(rho_values, ablation_trajectories)},
            "mpc_trajectories": {key: dataset_trajectories for key in rho_values}},
    seed=general_seed,
    exp_dir="./dualheater_data/rho_ablation",
    name="figure_6_center_data")

In [None]:
import numpy as np
from io_agent.utils import steady_state_cost, load_experiment
from io_agent.plotter import tube_figure

exp_trajectories = load_experiment("./dualheater_data/rho_ablation/figure_6_center_data-42")
io_rho_trajectories = exp_trajectories["rho_ablation"]
mpc_trajectories = exp_trajectories["mpc_trajectories"]
exp_trajectories = load_experiment("./dualheater_data/h_ablation/figure_6_left_data-42")
io_h_trajectories = exp_trajectories["h_ablation"]


tube_figure(
    cost_data={"IO-RMPC (1)": {key: steady_state_cost(trajectories, 0.4)
               for key, trajectories in io_rho_trajectories.items()},
               "IO-MPC (1)": {key: steady_state_cost(io_h_trajectories[(2, False)], 0.4)
               for key in io_rho_trajectories.keys()}
               },
    title=f"Figure 6 center",
    log_xaxis=True,
    percentiles=(20, 80),
)

In [None]:
import numpy as np
from io_agent.utils import steady_state_cost, load_experiment
from io_agent.plotter import histogram_figure

exp_trajectories = load_experiment("./dualheater_data/rho_ablation/figure_6_center_data-42")
io_rho_trajectories = exp_trajectories["rho_ablation"]
mpc_trajectories = exp_trajectories["mpc_trajectories"]
exp_trajectories = load_experiment("./dualheater_data/h_ablation/figure_6_left_data-42")
io_h_trajectories = exp_trajectories["h_ablation"]


opt_rho, opt_rho_trajectories = min(
    list(io_rho_trajectories.items()),
     key=lambda item: np.median(steady_state_cost(item[1], 0.4))
     )


histogram_figure(
    cost_data={"IO-RMPC (1)": steady_state_cost(opt_rho_trajectories, 0.4),
               "IO-MPC (1)": steady_state_cost(io_h_trajectories[(2, False)], 0.4),
               "IO-MPC (2)": steady_state_cost(io_h_trajectories[(8, False)], 0.4),
               "MPC": steady_state_cost(next(iter(mpc_trajectories.values())), 0.4),
               },
    title=f"Figure 6 right",
)
