# Fighter Environment

Experiments and figures.

In [27]:
from typing import List, Optional, Dict, Union, Any, Callable, Tuple, Type, Iterable
from functools import partial
from itertools import chain
import os
import pickle
from multiprocessing import Process, Queue, Pool
from tqdm.notebook import tqdm
import queue
import numpy as np
import cvxpy as cp
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px

from io_agent.plant.fighter import FighterEnv
from io_agent.plant.base import EnvMatrices, Plant
from io_agent.evaluator import ControlLoop, Transition
from io_agent.control.mpc import MPC
from io_agent.control.rmpc import RobustMPC
from io_agent.control.io import IOController, AugmentedTransition, AugmentDataset
from io_agent.utils import FeatureHandler


def parallelize(n_proc: int,
                fn: Callable[[Any], Any],
                kwargs_list: List[Dict[str, Any]],
                loading_bar_kwargs: Optional[Dict[str, Any]] = None
                ) -> List[Any]:

    def _async_execute_wrapper() -> None:
        while True:
            try:
                kwargs, key = work_queue.get(block=False)
            except queue.Empty:
                return None
            result = fn(**kwargs)
            result_queue.put({"key": key, "result": result})

    result_queue = Queue()
    work_queue = Queue()

    for key, kwargs in enumerate(kwargs_list):
        work_queue.put((kwargs, key))

    loading_bar = (partial(tqdm, **loading_bar_kwargs)
                   if loading_bar_kwargs is not None
                   else lambda x: x)

    process_list = []
    for _ in range(n_proc):
        process_list.append(Process(target=_async_execute_wrapper))
        process_list[-1].start()

    results_dict = {}
    for _ in loading_bar(range(len(kwargs_list))):
        _return = result_queue.get(block=True)
        results_dict[_return["key"]] = _return["result"]

    for process in (process_list):
        process.join()

    results = [results_dict[index] for index in range(len(results_dict))]
    return results


def save_experiment(values: Any, seed: int, exp_dir: str, name: str) -> None:
    os.makedirs(exp_dir, exist_ok=True)
    with open(os.path.join(exp_dir, f"{name}-{seed}"), "wb") as fobj:
        pickle.dump(values, fobj)


def load_experiment(path: str) -> Any:
    with open(os.path.join(path), "rb") as fobj:
        return pickle.load(fobj)


def try_solve(patience: int, verbose: bool = True):
    def decorator(function: Callable[[Any], Any]) -> Callable[[Any], Any]:
        def wrapper(*args, **kwargs) -> Any:
            for attempt in range(1, patience + 1):
                try:
                    return function(*args, **kwargs)
                except cp.SolverError as err:
                    if verbose:
                        print(f"Failed to solve at attempt: {attempt}")
            raise err
        return wrapper
    return decorator


def run_agent(agent: Union[MPC, IOController],
              plant: Plant,
              use_foresight: bool,
              disturbance_bias: Optional[np.ndarray] = None,
              bias_aware: bool = True,
              rng: np.random.Generator = None,
              ) -> List[Transition]:
    """ Simulate the agent in the Fighter environment for 1 trajectory

    Args:
        agent (MPC): MPC or IO controller
        env_length (int): Length of the environment
        use_foresight (bool): If true, feed the agent with future noise signal
        disturbance_bias (Optional[np.ndarray], optional): Bias for the state disturbance. Defaults to 0.
        bias_aware (bool, optional): _description_. If true, feed the agent with actual noise (biased).

    Returns:
        List[Transition]: Trajectory of transitions
    """
    state_disturbance = plant.state_disturbance.copy()
    if disturbance_bias is not None and not bias_aware:
        state_disturbance -= disturbance_bias
    evaluator = ControlLoop(
        state_disturbance=state_disturbance,
        output_disturbance=plant.output_disturbance,
        plant=plant,
        controller=agent,
        rng=np.random.default_rng(rng.integers(0, 2**30))
    )
    return evaluator.simulate(
        initial_state=None,
        use_foresight=use_foresight,
    )


def run_mpc(env_length: int = 60,
            horizon: int = 20,
            use_foresight: bool = True,
            disturbance_bias: Optional[np.ndarray] = None,
            bias_aware: bool = True,
            rng: np.random.Generator = None,
            ) -> List[Transition]:
    """ Run MPC agent

    Args:
        horizon (int, optional): Noise horizon of MPC. Defaults to 20.
        env_length (int): Length of the environment
        use_foresight (bool): If true, feed the agent with future noise signal
        disturbance_bias (Optional[np.ndarray], optional): Bias for the state disturbance. Defaults to 0.
        bias_aware (bool, optional): _description_. If true, feed the agent with actual noise (biased).

    Returns:
        List[Transition]: Trajectory of transitions
    """
    plant = FighterEnv(
        max_length=env_length,
        disturbance_bias=disturbance_bias,
        rng=np.random.default_rng(rng.integers(0, 2**30)))
    agent = MPC(
        action_size=plant.action_size,
        state_size=plant.state_size,
        noise_size=plant.noise_size,
        output_size=plant.output_size,
        horizon=horizon)
    agent.optimizer = agent.prepare_optimizer(plant.env_params)
    return run_agent(
        agent=agent,
        plant=plant,
        use_foresight=use_foresight,
        disturbance_bias=disturbance_bias,
        bias_aware=bias_aware,
        rng=rng,
    )


def run_rmpc(env_length: int = 60,
             horizon: int = 20,
             use_foresight: bool = True,
             rho: float = 0.1,
             disturbance_bias: Optional[np.ndarray] = None,
             bias_aware: bool = True,
             rng: np.random.Generator = None,
             ) -> List[Transition]:
    """ Run Robust MPC

    Args:
        horizon (int, optional): Noise horizon of MPC. Defaults to 20.
        rho (float, optional): Robustness radius. Defaults to 0.1.
        env_length (int): Length of the environment
        use_foresight (bool): If true, feed the agent with future noise signal
        disturbance_bias (Optional[np.ndarray], optional): Bias for the state disturbance. Defaults to 0.
        bias_aware (bool, optional): _description_. If true, feed the agent with actual noise (biased).

    Returns:
        List[Transition]: Trajectory of transitions
    """
    plant = FighterEnv(
        max_length=env_length,
        disturbance_bias=disturbance_bias,
        rng=np.random.default_rng(rng.integers(0, 2**30)))
    agent = RobustMPC(action_size=plant.action_size,
                      state_size=plant.state_size,
                      noise_size=plant.noise_size,
                      output_size=plant.output_size,
                      horizon=horizon,
                      rho=rho,
                      state_constraints_flag=True,
                      input_constraints_flag=True)
    agent.optimizer = agent.prepare_optimizer(plant.env_params)
    return run_agent(
        agent=agent,
        plant=plant,
        use_foresight=use_foresight,
        disturbance_bias=disturbance_bias,
        bias_aware=bias_aware,
        rng=rng,
    )


def prepare_io(dataset: List[Transition],
               rng: np.random.Generator,
               expert_class: Union[Type[MPC], Type[RobustMPC]],
               expert_kwargs: Dict[str, Any],
               env_length: int = 60,
               n_past: int = 1,
               disturbance_bias: Optional[np.ndarray] = None,
               ) -> Tuple[Union[
                   Plant,
                   FeatureHandler,
                   List[AugmentedTransition]]]:
    plant = FighterEnv(
        max_length=env_length,
        disturbance_bias=disturbance_bias,
        rng=np.random.default_rng(rng.integers(0, 2**30)))
    feature_handler = FeatureHandler(
        env_params=plant.env_params,
        n_past=n_past,
        add_bias=True,
        use_action_regressor=False,
        use_noise_regressor=True,
        use_state_regressor=False)
    expert_agent = expert_class(
        action_size=plant.action_size,
        state_size=plant.state_size,
        noise_size=plant.noise_size,
        output_size=plant.output_size,
        **expert_kwargs)
    expert_agent.optimizer = expert_agent.prepare_optimizer(plant.env_params)
    augmenter = AugmentDataset(
        expert_agent=expert_agent,
        feature_handler=feature_handler
    )
    augmented_dataset = augmenter(dataset)
    return plant, feature_handler, augmented_dataset


@try_solve(patience=2)
def run_io(plant: Plant,
           feature_handler: FeatureHandler,
           augmented_dataset: List[AugmentedTransition],
           rng: np.random.Generator,
           dataset_length: int = 300,
           disturbance_bias: Optional[np.ndarray] = None,
           bias_aware: bool = True,
           ) -> Callable[[Any], Any]:

    io_agent = IOController(
        env_params=feature_handler.env_params,
        include_constraints=True,
        soften_state_constraints=True,
        state_constraints_flag=True,
        action_constraints_flag=True,
        dataset_length=dataset_length,
        feature_handler=feature_handler)
    io_agent.train(
        augmented_dataset,
        rng=np.random.default_rng(rng.integers(0, 2**30)))
    io_agent.action_optimizer = io_agent.prepare_action_optimizer()
    return partial(run_agent,
                   plant=plant,
                   agent=io_agent,
                   disturbance_bias=disturbance_bias,
                   bias_aware=bias_aware,
                   use_foresight=False,   # IO agent does not look into the future
                   )


def run_io_mpc(dataset: List[Transition],
               rng: np.random.Generator,
               env_length: int = 60,
               n_past: int = 1,
               dataset_length: int = 300,
               disturbance_bias: Optional[np.ndarray] = None,
               bias_aware: bool = True,
               expert_horizon: int = 20,
               ) -> Callable[[Any], Any]:
    """ Train and simulate IO agent with MPC as the expert

    Args:
        dataset (List[Transition]): List if transitions to be used
            as the training data
        horizon (int, optional): Horizon of the expert agent. Defaults to 20.
        env_length (int, optional): Length of the environment. Defaults to 60.
        disturbance_bias (Optional[np.ndarray], optional): Bias for the state disturbance. Defaults to 0.
        bias_aware (bool, optional): _description_. If true, feed the agent with actual noise (biased).

    Returns:
        List[Transition]: Trajectory of transitions
    """
    (plant,
     feature_handler,
     augmented_dataset
     ) = prepare_io(
        dataset=dataset,
        rng=rng,
        expert_class=MPC,
        expert_kwargs={"horizon": expert_horizon},
        env_length=env_length,
        n_past=n_past,
        disturbance_bias=disturbance_bias,
    )
    return run_io(
        plant=plant,
        feature_handler=feature_handler,
        augmented_dataset=augmented_dataset,
        rng=rng,
        dataset_length=dataset_length,
        disturbance_bias=disturbance_bias,
        bias_aware=bias_aware,
    )


def run_io_rmpc(dataset: List[Transition],
                rng: np.random.Generator,
                expert_rho: float,
                n_past: int = 1,
                env_length: int = 60,
                dataset_length: int = 300,
                disturbance_bias: Optional[np.ndarray] = None,
                bias_aware: bool = True,
                expert_horizon: int = 20,
                ) -> Callable[[Any], Any]:
    """ Train and simulate IO agent with Robust MPC as the expert

    Args:
        dataset (List[Transition]): List if transitions to be used
            as the training data
        horizon (int, optional): Horizon of the expert agent. Defaults to 20.
        env_length (int, optional): Length of the environment. Defaults to 60.
        disturbance_bias (Optional[np.ndarray], optional): Bias for the state disturbance. Defaults to 0.
        bias_aware (bool, optional): _description_. If true, feed the agent with actual noise (biased).

    Returns:
        List[Transition]: Trajectory of transitions
    """
    (plant,
     feature_handler,
     augmented_dataset
     ) = prepare_io(
        dataset=dataset,
        rng=rng,
        expert_class=RobustMPC,
        expert_kwargs={
            "horizon": expert_horizon,
            "rho": expert_rho,
            "state_constraints_flag": True,
            "input_constraints_flag": True
        },
        env_length=env_length,
        n_past=n_past,
        disturbance_bias=disturbance_bias,
    )
    return run_io(
        plant=plant,
        feature_handler=feature_handler,
        augmented_dataset=augmented_dataset,
        rng=rng,
        dataset_length=dataset_length,
        disturbance_bias=disturbance_bias,
        bias_aware=bias_aware,
    )


def make_figure(cost_data: Dict[str, List[float]],
                title: str,
                color_list: List[str] = px.colors.qualitative.T10
                ) -> go.FigureWidget:
    """ Create a cost density plot

    Args:
        cost_data (Dict[str, List[float]]): Mapping of agents to cost list
        title (str): Title of the plot
        color_list (List[str], optional): Color list. Defaults to px.colors.qualitative.T10.

    Returns:
        go.FigureWidget: Plot widget
    """
    cost_label_pair = list(cost_data.items())
    costs = [item[1] for item in cost_label_pair]
    labels = [item[0] for item in cost_label_pair]
    colors = [color_list[index % len(color_list)] for index in range(len(labels))]

    fig = ff.create_distplot(
        costs,
        group_labels=labels,
        colors=colors,
        bin_size=4,
        show_rug=False)
    for color, cost_list in zip(colors, costs):
        fig.add_vline(
            x=np.median(cost_list),
            line_width=3,
            line_dash="dash",
            line_color=color
        )

    common_axis_layout = dict(
        showline=True,
        linecolor="#a2a2a2",
        linewidth=1,
        showgrid=True,
        gridcolor="#a2a2a2",
    )
    fig.update_layout(
        template="plotly_white",
        width=700,
        height=400,
        title=dict(text=f"{title}", x=0.5),
        yaxis=dict(
            **common_axis_layout,
            title=dict(text="density"),
            #  type="log"
        ),
        xaxis=dict(
            **common_axis_layout,
            title=dict(text="cost")
        ),
        bargap=0.1,
        font=dict(
            size=12,
            color="Black"
        )
    )
    return fig

## Trajectory Cost Distributions

- Experiment in Figure 1 Left


In [30]:
n_proc = 20  # Choose this based on your CPU
n_trials = 100
env_length = 51
horizon = 20
n_past = 1
seed_rng = np.random.default_rng(42)

mpc_obl_trajectories = parallelize(
    n_proc, run_mpc, [dict(use_foresight=False, # Without hindsight
                           horizon=horizon,
                           env_length=env_length,
                           rng=np.random.default_rng(_seed))
                      for _seed in seed_rng.integers(0, 2**30, size=n_trials)],
                      loading_bar_kwargs={"desc": "MPC-obl trials"})
mpc_dst_trajectories = parallelize(
    n_proc, run_mpc, [dict(use_foresight=True, # With hindsight
                           horizon=horizon,
                           env_length=env_length,
                           rng=np.random.default_rng(_seed))
                      for _seed in seed_rng.integers(0, 2**30, size=n_trials)],
                      loading_bar_kwargs={"desc": "MPC-dst trials"})

io_mpc_dst_trajectories = parallelize(
        n_proc, run_io_mpc(mpc_obl_trajectories[:10],
                            dataset_length=200,
                            n_past=n_past,
                            rng=np.random.default_rng(seed_rng.integers(0, 2**30))),
        [dict(rng=np.random.default_rng(_seed))
            for _seed in seed_rng.integers(0, 2**30, size=n_trials)],
            loading_bar_kwargs={"desc": "IO-MPC trials"})

MPC-obl trials:   0%|          | 0/100 [00:00<?, ?it/s]

MPC-dst trials:   0%|          | 0/100 [00:00<?, ?it/s]


You are solving a parameterized problem that is not DPP. Because the problem is not DPP, subsequent solves will not be faster than the first one. For more information, see the documentation on Discplined Parametrized Programming, at
	https://www.cvxpy.org/tutorial/advanced/index.html#disciplined-parametrized-programming



IO-MPC trials:   0%|          | 0/100 [00:00<?, ?it/s]

Plot the cost distributions.

In [31]:
mpc_obl_costs = [transition.cost for transition in chain(*[traj[int(60 * 0.6):] for traj in mpc_obl_trajectories])]
mpc_dst_costs = [transition.cost for transition in chain(*[traj[int(60 * 0.6):] for traj in mpc_dst_trajectories])]
io_mpc_dst_costs = [transition.cost for transition in chain(*[traj[int(60 * 0.6):] for traj in io_mpc_dst_trajectories])]

fig = make_figure({
    "MPC (obl)": mpc_obl_costs,
    "MPC (dst)": mpc_dst_costs,
    "IO-MPC (dst)": io_mpc_dst_costs},
    title=f"Figure 1 left with {len(mpc_obl_trajectories)} trials",
)
fig

- Experiment in Figure 1 Middle

In [32]:
n_trials = 50
disturbance_bias = np.array([0.0, 0.005]).reshape(-1, 1)
rho=0.01
seed_rng = np.random.default_rng(42)


io_mpc_dst_trajectories = parallelize(
        n_proc, run_io_mpc(mpc_obl_trajectories[:10],
                            dataset_length=200,
                            n_past=n_past,
                            disturbance_bias=disturbance_bias,
                            rng=np.random.default_rng(seed_rng.integers(0, 2**30))),
        [dict(rng=np.random.default_rng(_seed))
            for _seed in seed_rng.integers(0, 2**30, size=n_trials)],
            loading_bar_kwargs={"desc": "IO-MPC trials"})
io_rmpc_dst_trajectories = parallelize(
        n_proc, run_io_rmpc(mpc_obl_trajectories[:10],
                            dataset_length=200,
                            n_past=n_past,
                            expert_rho=rho,
                            disturbance_bias=disturbance_bias,
                            rng=np.random.default_rng(seed_rng.integers(0, 2**30))),
        [dict(rng=np.random.default_rng(_seed))
            for _seed in seed_rng.integers(0, 2**30, size=n_trials)],
            loading_bar_kwargs={"desc": "IO-RMPC trials"})

IO-MPC trials:   0%|          | 0/50 [00:00<?, ?it/s]

IO-RMPC trials:   0%|          | 0/50 [00:00<?, ?it/s]

In [33]:
io_mpc_dst_costs = [transition.cost for transition in chain(*[traj[int(60 * 0.6):] for traj in io_mpc_dst_trajectories])]
io_rmpc_dst_costs = [transition.cost for transition in chain(*[traj[int(60 * 0.6):] for traj in io_rmpc_dst_trajectories])]

fig = make_figure({
    "IO-MPC": io_mpc_dst_costs,
    "IO-RMPC": io_rmpc_dst_costs,
    },
    title=f"Figure 1 middle with {len(io_rmpc_dst_trajectories)} trials",
)
fig


- Experiment in Figure 1 Right

In [34]:
n_trials = 50
disturbance_bias = np.array([0.0, 0.005]).reshape(-1, 1)
seed_rng = np.random.default_rng(42)

mpc_obl_trajectories = parallelize(
    n_proc, run_mpc, [dict(use_foresight=False,
                           horizon=horizon,
                           env_length=env_length,
                           disturbance_bias=disturbance_bias,
                           rng=np.random.default_rng(_seed))
                      for _seed in seed_rng.integers(0, 2**30, size=n_trials)],
                      loading_bar_kwargs={"desc": "MPC-obl trials"})
mpc_fdst_trajectories = parallelize(
    n_proc, run_mpc, [dict(use_foresight=True,
                           horizon=horizon,
                           env_length=env_length,
                           disturbance_bias=disturbance_bias,
                           bias_aware=True, # Bias in the noise is known
                           rng=np.random.default_rng(_seed))
                      for _seed in seed_rng.integers(0, 2**30, size=n_trials)],
                      loading_bar_kwargs={"desc": "MPC-fdst trials"})
mpc_pdst_trajectories = parallelize(
    n_proc, run_mpc, [dict(use_foresight=True,
                           horizon=horizon,
                           env_length=env_length,
                           disturbance_bias=disturbance_bias,
                           bias_aware=False, # Bias is the noise is not known
                           rng=np.random.default_rng(_seed))
                      for _seed in seed_rng.integers(0, 2**30, size=n_trials)],
                      loading_bar_kwargs={"desc": "MPC-pdst trials"})


MPC-obl trials:   0%|          | 0/50 [00:00<?, ?it/s]

MPC-fdst trials:   0%|          | 0/50 [00:00<?, ?it/s]

MPC-pdst trials:   0%|          | 0/50 [00:00<?, ?it/s]

In [35]:
mpc_obl_costs = [transition.cost for transition in chain(*[traj[int(60 * 0.6):] for traj in mpc_obl_trajectories])]
mpc_fdst_costs = [transition.cost for transition in chain(*[traj[int(60 * 0.6):] for traj in mpc_fdst_trajectories])]
mpc_pdst_costs = [transition.cost for transition in chain(*[traj[int(60 * 0.6):] for traj in mpc_pdst_trajectories])]
io_rmpc_dst_costs = [transition.cost for transition in chain(*[traj[int(60 * 0.6):] for traj in io_rmpc_dst_trajectories])]

fig = make_figure({
        "MPC (obl)": mpc_obl_costs,
        "MPC (f-dst)": mpc_fdst_costs,
        "MPC (p-dst)": mpc_pdst_costs,
        "IO-RMPC": io_rmpc_dst_costs,
    },
    title=f"Figure 1 Right with {len(io_rmpc_dst_trajectories)} trials"
)
# fig.update_layout(width=1200, height=600)
fig

## Uncertainty Radius

In [36]:
def make_rho_figure(cost_data: Dict[str, Dict[int, List[float]]],
                    title: str,
                    color_list: List[str] = px.colors.qualitative.T10,
                    percentiles: Tuple[int] = (20, 80),
                    ) -> go.FigureWidget:
    """ Make error plot as in Figure 2.a and 2.b

    Arg:s
        cost_data (Dict[str, Dict[int, List[float]]]): Dictionary of costs per rho
        title (str): Title of the plot
        color_list (List[str], optional): Color list. Defaults to px.colors.qualitative.T10.
        percentiles (Tuple[int]): Lower and Upper percentiles.

    Returns:
        go.FigureWidget: Plot widget
    """
    fig = go.FigureWidget()
    cost_label_pair = list(cost_data.items())
    cost_data = [item[1] for item in cost_label_pair]
    labels = [item[0] for item in cost_label_pair]
    colors = [color_list[index % len(color_list)] for index in range(len(labels))]

    percentile_lower, percentile_up = percentiles
    for color, cost_dict, label in zip(colors, cost_data, labels):
        rho_values = {rho: np.percentile(cost_list, [percentile_lower, 50, percentile_up])
                      for rho, cost_list in cost_dict.items()}

        fig.add_trace(go.Scatter(
            x=list(rho_values.keys()),
            y=[item[1] for item in rho_values.values()],
            line=dict(color=color),
            mode="lines",
            name=label,
            legendgroup=label
        ))
        fig.add_trace(
            go.Scatter(
                name="Upper Bound",
                x=list(rho_values.keys()),
                y=[item[2] for item in rho_values.values()],
                mode="lines",
                marker=dict(color=color),
                line=dict(width=0),
                showlegend=False,
                legendgroup=label
            ))
        fig.add_trace(
            go.Scatter(
                name="Lower Bound",
                x=list(rho_values.keys()),
                y=[item[0] for item in rho_values.values()],
                marker=dict(color=color),
                line=dict(width=0),
                mode="lines",
                # fillcolor=color,
                opacity=0.5,
                fill="tonexty",
                showlegend=False,
                legendgroup=label
            ))

    common_axis_layout = dict(
        showline=True,
        linecolor="#a2a2a2",
        linewidth=1,
        showgrid=True,
        gridcolor="#a2a2a2",
    )
    fig.update_layout(
        template="plotly_white",
        width=700,
        height=400,
        title=dict(text=f"{title}", x=0.5),
        yaxis=dict(
            **common_axis_layout,
            title=dict(text="costs"),
        ),
        xaxis=dict(
            **common_axis_layout,
            title=dict(text="uncertainty radius"),
            type="log"
        ),
        font=dict(
            size=12,
            color="Black"
        )
    )
    return fig

- Experiment in Figure 2 Left

In [23]:
train_dataset_length = 200
n_obl_trajectories = 20
n_io_agents = 50
n_io_trials = 50
n_rho = 12
env_length = 60
expert_horizon = 20
n_proc = 9  # Choose this based on your CPU cores
seed_rng = np.random.default_rng(42)
disturbance_bias = np.array([0.0, 0.005]).reshape(-1, 1)


def async_execute_run_io(*args, **kwargs):
    return run_io(*args, **kwargs)


def run_rho_experiment(rho: float,
                       rng: np.random.Generator,
                       mpc_obl_trajectories: List[List[Transition]]):
    rho_io_rmpc_costs = []
    # Augment the obl trajectories(dataset) with the RMPC actions
    plant, feature_handler, augmented_dataset = prepare_io(
        dataset=mpc_obl_trajectories,
        rng=np.random.default_rng(rng.integers(1, 2**30)),
        expert_class=RobustMPC,
        expert_kwargs={
            "horizon": expert_horizon,
            "rho": rho,
            "state_constraints_flag": True,
            "input_constraints_flag": True
        },
        env_length=env_length,
        n_past=n_past,
        disturbance_bias=disturbance_bias,
    )

    # Train ```n_io_agents``` many agents
    io_agent_runables = parallelize(
        n_proc=n_proc,
        fn=async_execute_run_io,
        kwargs_list=[dict(
            plant=plant,
            feature_handler=feature_handler,
            augmented_dataset=augmented_dataset,
            rng=np.random.default_rng(_seed),
            dataset_length=train_dataset_length,
            disturbance_bias=disturbance_bias,
            bias_aware=False,
        ) for _seed in rng.integers(0, 2**30, size=n_io_agents)],
    )

    # Evaluate all io_agents
    for io_agent_runner in tqdm(io_agent_runables, desc=f"Evaluate IO-(rho={np.round(rho, decimals=4)})"):
        io_rmpc_trajectories = parallelize(
            n_proc=n_proc,
            fn=io_agent_runner,
            kwargs_list=[dict(rng=np.random.default_rng(_seed))
                         for _seed in rng.integers(0, 2**30, size=n_io_trials)])
        costs = [trans.cost for trans in chain(
            *[traj[int(60 * 0.6):] for traj in io_rmpc_trajectories])]
        rho_io_rmpc_costs.append(np.mean(costs))  # Append the average of the steady state cost
    return rho_io_rmpc_costs


# Gather ```n_obl_trajectories``` many mpc obl trajectories
mpc_obl_trajectories = parallelize(
    n_proc=n_proc,
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=False,
                      horizon=horizon,
                      env_length=env_length,
                      disturbance_bias=disturbance_bias,
                      rng=np.random.default_rng(_seed))
                 for _seed in seed_rng.integers(0, 2**30, size=n_obl_trajectories)])

rho_keys = np.logspace(-3, -1.6, n_rho)
rho_values = parallelize(
    n_proc=12,
    fn=run_rho_experiment,
    kwargs_list=[dict(rho=_rho,
                      rng=np.random.default_rng(_seed),
                      mpc_obl_trajectories=mpc_obl_trajectories)
                 for _rho, _seed in zip(rho_keys, seed_rng.integers(0, 2**30, size=len(rho_keys)))],
    loading_bar_kwargs=dict(desc="rho values")
)
io_rmpc_rho_costs = {key: value for key, value in zip(rho_keys, rho_values)}

# save_experiment(io_rmpc_rho_costs, seed=42, exp_dir="./results", name="figure_2_left")


You are solving a parameterized problem that is not DPP. Because the problem is not DPP, subsequent solves will not be faster than the first one. For more information, see the documentation on Discplined Parametrized Programming, at
	https://www.cvxpy.org/tutorial/advanced/index.html#disciplined-parametrized-programming


You are solving a parameterized problem that is not DPP. Because the problem is not DPP, subsequent solves will not be faster than the first one. For more information, see the documentation on Discplined Parametrized Programming, at
	https://www.cvxpy.org/tutorial/advanced/index.html#disciplined-parametrized-programming


You are solving a parameterized problem that is not DPP. Because the problem is not DPP, subsequent solves will not be faster than the first one. For more information, see the documentation on Discplined Parametrized Programming, at
	https://www.cvxpy.org/tutorial/advanced/index.html#disciplined-parametrized-programming



Evaluate IO-(rho=0.0013):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.001):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.0018):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.0032):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.0043):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.0024):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.0058):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.0078):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.0104):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.0187):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.014):   0%|          | 0/50 [00:00<?, ?it/s]

Evaluate IO-(rho=0.0251):   0%|          | 0/50 [00:00<?, ?it/s]

In [37]:
make_rho_figure({
    "IO-RMPC": io_rmpc_rho_costs,
    "MPC (f-dst)": {rho: mpc_fdst_costs for rho in io_rmpc_rho_costs.keys()}
}, title="Figure 2 Left"
)

FigureWidget({
    'data': [{'legendgroup': 'IO-RMPC',
              'line': {'color': '#4C78A8'},
              'mode': 'lines',
              'name': 'IO-RMPC',
              'type': 'scatter',
              'uid': 'a524190a-7e87-4156-a0df-745dbe5f260c',
              'x': [0.001, 0.001340518238791474, 0.001796989148532594,
                    0.002408896728518303, 0.003229169999943897,
                    0.004328761281083057, 0.0058027834486661855,
                    0.007778737048694304, 0.010427538888537676,
                    0.013978306065792125, 0.01873817422860383,
                    0.025118864315095794],
              'y': [67.97742921401436, 65.50168908935765, 66.36510062488568,
                    60.93384857065527, 64.64576898647763, 70.14221066972183,
                    64.7411751873172, 68.57921952976453, 65.79088118253893,
                    62.27300188164426, 50.06833785605966, 63.45952159137413]},
             {'legendgroup': 'IO-RMPC',
              'line': {'

- Experiment in Figure 2 Middle

In [39]:
n_obl_trajectories = 10
n_trials = 100
seed_rng = np.random.default_rng(42)
disturbance_bias = np.array([0.0, 0.005]).reshape(-1, 1)


# Gather ```n_obl_trajectories``` many mpc obl trajectories
mpc_obl_trajectories = parallelize(
    n_proc=n_proc,
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=False,
                      horizon=horizon,
                      env_length=env_length,
                      disturbance_bias=disturbance_bias,
                      rng=np.random.default_rng(_seed))
                 for _seed in seed_rng.integers(0, 2**30, size=n_obl_trajectories)])


io_rmpc_trajectories = {}
for rho in np.logspace(-3, -1.6, 12):
    io_rmpc_trajectories[rho] = parallelize(
        n_proc, run_io_rmpc(mpc_obl_trajectories,
                            dataset_length=200,
                            n_past=n_past,
                            expert_rho=rho,
                            disturbance_bias=disturbance_bias,
                            rng=np.random.default_rng(seed_rng.integers(0, 2**30))),
        [dict(rng=np.random.default_rng(_seed))
            for _seed in seed_rng.integers(0, 2**30, size=n_trials)],
            loading_bar_kwargs=dict(desc="rho values"))


rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

rho values:   0%|          | 0/100 [00:00<?, ?it/s]

In [40]:
io_rmpc_costs = {rho: [trans.cost for trans in chain(*[traj[int(60 * 0.6):] for traj in trajectories])]
    for rho, trajectories in io_rmpc_trajectories.items()}

make_rho_figure(
    {
        "IO-RMPC": io_rmpc_costs,
        "MPC (f-dst)": {rho: mpc_fdst_costs for rho in io_rmpc_costs.keys()},
        "IO MPC": {rho: io_mpc_dst_costs for rho in io_rmpc_costs.keys()}
        },
    title="Figure 2 Middle"
    )

FigureWidget({
    'data': [{'legendgroup': 'IO-RMPC',
              'line': {'color': '#4C78A8'},
              'mode': 'lines',
              'name': 'IO-RMPC',
              'type': 'scatter',
              'uid': 'c3106fe3-625d-46cb-aeef-f1e085e4a3e5',
              'x': [0.001, 0.001340518238791474, 0.001796989148532594,
                    0.002408896728518303, 0.003229169999943897,
                    0.004328761281083057, 0.0058027834486661855,
                    0.007778737048694304, 0.010427538888537676,
                    0.013978306065792125, 0.01873817422860383,
                    0.025118864315095794],
              'y': [55.042651561696466, 56.408631700092485, 59.23982224465249,
                    58.10681000242705, 56.74585291365133, 59.86577481394686,
                    60.012200800971726, 62.25102393384512, 70.61932852698811,
                    63.82505312317947, 47.87424683580917, 49.20701999264188]},
             {'legendgroup': 'IO-RMPC',
              'line'

- Experiment in Figure 2 Right

Optimal IO-RMPC

In [44]:
opt_rho, opt_costs = min(list(io_rmpc_costs.items()), key=lambda item: np.median(item[1]))

fig = make_figure({
        "MPC (f-dst)": mpc_fdst_costs,
        f"IO-RMPC(rho*={np.round(opt_rho, decimals=4)})": opt_costs,
        "IO-MPC": io_mpc_dst_costs
    },
    title=f"Figure 2 Right with {len(io_rmpc_dst_trajectories)} trials"
)
fig