# Fighter Experiments

### Trajectory Cost Distributions

- The experiment shown in Figure 1 Left


In [None]:
import numpy as np

from io_agent.plant.fighter import FighterEnv

from common import run_mpc, run_rmpc, run_io_mpc, run_io_rmpc
from utils import parallelize, steady_state_cost
from plotter import histogram_figure


n_proc = 20  # Choose this based on your CPU
n_trials = 50
env_length = 61
horizon = 20
n_past = 1
n_train_obl_trajectories = 10
seed_rng = np.random.default_rng(42)

plant = FighterEnv(max_length=env_length)
# Evaluate each agent with the same set of random disturbances
experiment_seeds = dict(
    dataset_permute=seed_rng.integers(0, 2**30, size=1),
    trials=seed_rng.integers(0, 2**30, size=n_trials)
)

mpc_obl_trajectories = parallelize(
    n_proc=min(n_proc, n_trials),
    fn=run_mpc,
    kwargs_list=[dict(
        use_foresight=False,  # Without hindsight
        horizon=horizon,
        plant=plant,
        env_reset_rng=np.random.default_rng(_seed)) for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "MPC-obl trials"})
mpc_dst_trajectories = parallelize(
    n_proc=min(n_proc, n_trials),
    fn=run_mpc,
    kwargs_list=[dict(
        use_foresight=True,  # With hindsight
        horizon=horizon,
        plant=plant,
        env_reset_rng=np.random.default_rng(_seed)) for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "MPC-dst trials"})
plant = FighterEnv(max_length=60)
io_mpc_dst_trajectories = parallelize(
    n_proc=min(n_proc, n_trials),
    fn=run_io_mpc(dataset=mpc_obl_trajectories[:n_train_obl_trajectories],  # Using the obl trajectories
                  dataset_length=200,
                  n_past=n_past,
                  plant=plant,
                  dataset_permute_rng=np.random.default_rng(experiment_seeds["dataset_permute"])),
    kwargs_list=[dict(env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "IO-MPC trials"})

In [None]:
histogram_figure(
    {
        "MPC (obl)": steady_state_cost(mpc_obl_trajectories, 0.4),
        "MPC (dst)": steady_state_cost(mpc_dst_trajectories, 0.4),
        "IO-MPC": steady_state_cost(io_mpc_dst_trajectories, 0.4)
    },
    title=f"Figure 1 left with {len(mpc_obl_trajectories)} trials",
)

- The experiment shown in Figure 1 Middle

In [None]:
import numpy as np

from io_agent.plant.fighter import FighterEnv

from common import run_mpc, run_io_mpc, run_io_rmpc
from utils import parallelize, steady_state_cost
from plotter import histogram_figure


n_proc = 20  # Choose this based on your CPU
n_trials = 50
horizon = 20
rho = 0.01
n_past = 1
env_length = 61
n_train_obl_trajectories = 10
disturbance_bias = np.array([0.0, 0.005]).reshape(-1, 1)
seed_rng = np.random.default_rng(42)


experiment_seeds = dict(
    dataset_permute=seed_rng.integers(0, 2**30, size=1),
    trials=seed_rng.integers(0, 2**30, size=n_trials)
)
# Environment with biased disturbance
plant = FighterEnv(max_length=env_length, disturbance_bias=disturbance_bias)

# Environment without disturbance bias
unbiased_plant = FighterEnv(max_length=env_length)
mpc_obl_train_trajectories = parallelize(
    n_proc=min(n_proc, n_train_obl_trajectories),
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=False,  # Without hindsight
                      horizon=horizon,
                      plant=unbiased_plant,
                      env_reset_rng=np.random.default_rng(_seed))
                 for _seed in seed_rng.integers(0, 2**30, size=n_train_obl_trajectories)],
    loading_bar_kwargs={"desc": "MPC-obl-dataset trials"})


io_mpc_dst_trajectories = parallelize(
    n_proc=min(n_proc, n_trials),
    fn=run_io_mpc(mpc_obl_train_trajectories,
                  dataset_length=200,
                  n_past=n_past,
                  expert_horizon=horizon,
                  plant=plant,
                  dataset_permute_rng=np.random.default_rng(experiment_seeds["dataset_permute"])),
    kwargs_list=[dict(env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "IO-MPC trials"})
io_rmpc_dst_trajectories = parallelize(
    n_proc=min(n_proc, n_trials),
    fn=run_io_rmpc(mpc_obl_train_trajectories,
                   dataset_length=200,
                   n_past=n_past,
                   plant=plant,
                   expert_horizon=horizon,
                   expert_rho=rho,
                   dataset_permute_rng=np.random.default_rng(experiment_seeds["dataset_permute"])),
    kwargs_list=[dict(env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "IO-RMPC trials"})

In [None]:
histogram_figure(
    {
        "IO-MPC": steady_state_cost(io_mpc_dst_trajectories, 0.4),
        "IO-RMPC": steady_state_cost(io_rmpc_dst_trajectories, 0.4),
    },
    title=f"Figure 1 center with {len(io_mpc_dst_trajectories)} trials",
)

- The experiment shown in Figure 1 Right

In [None]:
import numpy as np

from io_agent.plant.fighter import FighterEnv

from common import run_mpc, run_io_mpc, run_io_rmpc
from utils import parallelize, steady_state_cost
from plotter import histogram_figure


n_proc = 20  # Choose this based on your CPU
n_trials = 50
horizon = 20
rho = 0.01
n_past = 1
env_length = 61
n_train_obl_trajectories = 10
disturbance_bias = np.array([0.0, 0.005]).reshape(-1, 1)
seed_rng = np.random.default_rng(42)


# Environment with biased disturbance
plant = FighterEnv(max_length=env_length, disturbance_bias=disturbance_bias)
experiment_seeds = dict(
    dataset_permute=seed_rng.integers(0, 2**30, size=1),
    trials=seed_rng.integers(0, 2**30, size=n_trials)
)

# Environment without disturbance bias
unbiased_plant = FighterEnv(max_length=env_length)
mpc_obl_train_trajectories = parallelize(
    n_proc=min(n_proc, n_train_obl_trajectories),
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=False,  # Without hindsight
                      horizon=horizon,
                      plant=unbiased_plant,
                      env_reset_rng=np.random.default_rng(_seed))
                 for _seed in seed_rng.integers(0, 2**30, size=n_train_obl_trajectories)],
    loading_bar_kwargs={"desc": "MPC-obl-dataset trials"})

mpc_obl_trajectories = parallelize(
    n_proc=min(n_proc, n_train_obl_trajectories),
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=False,  # Without hindsight
                      horizon=horizon,
                      plant=plant,
                      env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "MPC-obl trials"})
mpc_fdst_trajectories = parallelize(
    n_proc=min(n_proc, n_train_obl_trajectories),
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=True,  # With hindsight
                      horizon=horizon,
                      bias_aware=True,  # Aware of the bias in the disturbance
                      plant=plant,
                      env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "MPC-fdst trials"})
mpc_pdst_trajectories = parallelize(
    n_proc=min(n_proc, n_train_obl_trajectories),
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=True,  # With hindsight
                      horizon=horizon,
                      bias_aware=False,  # Unaware of the bias in the disturbance
                      plant=plant,
                      env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "MPC-pdst trials"})
io_rmpc_dst_trajectories = parallelize(
    n_proc=min(n_proc, n_trials),
    fn=run_io_rmpc(mpc_obl_train_trajectories,
                   dataset_length=200,
                   n_past=n_past,
                   plant=plant,
                   expert_horizon=horizon,
                   expert_rho=rho,
                   dataset_permute_rng=np.random.default_rng(experiment_seeds["dataset_permute"])),
    kwargs_list=[dict(env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "IO-RMPC trials"})

In [None]:
histogram_figure(
    {
        "MPC (obl)": steady_state_cost(mpc_obl_trajectories, 0.4),
        "MPC (f-dst)": steady_state_cost(mpc_fdst_trajectories, 0.4),
        "MPC (p-dst)": steady_state_cost(mpc_pdst_trajectories, 0.4),
        "IO-RMPC": steady_state_cost(io_rmpc_dst_trajectories, 0.4),
    },
    title=f"Figure 1 center with {len(io_rmpc_dst_trajectories)} trials",
)

### Uncertainty Radius $\rho$

- The experiment shown in Figure 2 left

In [None]:
from typing import List
import numpy as np
from tqdm.notebook import tqdm

from io_agent.plant.fighter import FighterEnv
from io_agent.plant.base import Plant
from io_agent.evaluator import Transition
from io_agent.control.rmpc import RobustMPC

from common import run_mpc, prepare_io, run_io
from utils import parallelize, save_experiment


train_dataset_length = 250
n_rho = 20
env_length = 61
expert_horizon = 20
n_past = 1
n_io_agents = 20
n_io_trials = 50
n_train_obl_trajectories = 15

n_proc = 9  # Choose this based on your CPU cores
disturbance_bias = np.array([0.0, 0.005]).reshape(-1, 1)
general_seed = 42
seed_rng = np.random.default_rng(general_seed)


def async_execute_run_io(*args, **kwargs):
    return run_io(*args, **kwargs)


def run_rho_experiment(rho: float,
                       trial_seeds: List[int],
                       dataset_permute_seeds: List[int],
                       plant: Plant,
                       mpc_obl_dataset: List[List[Transition]]
                       ) -> List[List[List[Transition]]]:
    rho_io_rmpc_trajectories = []
    # Augment the obl trajectories(dataset) with the RMPC actions
    linearized_plant, feature_handler, augmented_dataset = prepare_io(
        plant=plant,
        dataset=mpc_obl_dataset,
        expert_class=RobustMPC,
        expert_kwargs={
            "horizon": expert_horizon,
            "rho": rho,
            "state_constraints_flag": True,
            "input_constraints_flag": True
        },
        n_past=n_past,
    )

    # Train ```n_io_agents``` many agents
    io_agent_runables = parallelize(
        n_proc=n_proc,
        fn=async_execute_run_io,
        kwargs_list=[dict(
            plant=linearized_plant,
            feature_handler=feature_handler,
            augmented_dataset=augmented_dataset,
            dataset_permute_rng=np.random.default_rng(_seed),
            dataset_length=train_dataset_length,
        ) for _seed in dataset_permute_seeds],
    )

    # Evaluate all io_agents
    for io_agent_runner in tqdm(io_agent_runables, desc=f"Evaluate IO-(rho={np.round(rho, decimals=4)})"):
        io_rmpc_trajectories = parallelize(
            n_proc=n_proc,
            fn=io_agent_runner,
            kwargs_list=[dict(env_reset_rng=np.random.default_rng(_seed)) for _seed in trial_seeds])
        rho_io_rmpc_trajectories.append(io_rmpc_trajectories)
    return rho_io_rmpc_trajectories


# Environment with biased disturbance. We are using the same trial seeds between
# varying rhos and io agents. But, each io agent is trained with a different
# random partition of the dataset, hence there are ```n_io_agents``` many
# dataset permutation seeds.
plant = FighterEnv(max_length=env_length, disturbance_bias=disturbance_bias)
experiment_seeds = dict(
    dataset_permute=seed_rng.integers(0, 2**30, size=n_io_agents),
    trials=seed_rng.integers(0, 2**30, size=n_io_trials)
)

# Environment without disturbance bias
unbiased_plant = FighterEnv(max_length=env_length)
mpc_obl_train_trajectories = parallelize(
    n_proc=min(n_proc, n_train_obl_trajectories),
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=False,  # Without hindsight
                      horizon=expert_horizon,
                      plant=unbiased_plant,
                      env_reset_rng=np.random.default_rng(_seed))
                 for _seed in seed_rng.integers(0, 2**30, size=n_train_obl_trajectories)],
    loading_bar_kwargs={"desc": "MPC-obl-dataset trials"})

# Rho values in log scale
rho_keys = np.logspace(-3, -0.5, n_rho)
rho_trajectories = parallelize(
    n_proc=n_rho,
    fn=run_rho_experiment,
    kwargs_list=[dict(rho=_rho,
                      trial_seeds=experiment_seeds["trials"],
                      dataset_permute_seeds=experiment_seeds["dataset_permute"],
                      plant=plant,
                      mpc_obl_dataset=mpc_obl_train_trajectories)
                 for _rho in rho_keys],
    loading_bar_kwargs=dict(desc="rho values")
)

mpc_fdst_trajectories = parallelize(
    n_proc=min(n_proc, n_train_obl_trajectories),
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=True,  # With hindsight
                      horizon=expert_horizon,
                      bias_aware=True,  # Aware of the bias in the disturbance
                      plant=plant,
                      env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "MPC-fdst trials"})

# Save the experiment since it takes some time to complete.
save_experiment(
    values={"rho_trajectories": {key: trajectories for key, trajectories in zip(rho_keys, rho_trajectories)},
            "mpc_fdst_trajectories": mpc_fdst_trajectories},
    seed=general_seed,
    exp_dir="./fighter_data/rho_ablation",
    name="figure_2_left_data_test")

In [None]:
import numpy as np
from utils import steady_state_cost, load_experiment
from plotter import tube_figure

exp_trajectories = load_experiment("./fighter_data/rho_ablation/figure_2_left_data_test-42")
io_rho_trajectories = exp_trajectories["rho_trajectories"]
mpc_fdst_trajectories = exp_trajectories["mpc_fdst_trajectories"]


io_rmpc_mean_rho_costs = {
    key: [np.mean(steady_state_cost(trajectories, 0.4)) for trajectories in io_trajectories_list]
    for key, io_trajectories_list in io_rho_trajectories.items()
    if io_trajectories_list is not None
}

tube_figure(
    cost_data={
        "IO-RMPC": io_rmpc_mean_rho_costs,
        "MPC (f-dst)": {key: np.median(steady_state_cost(mpc_fdst_trajectories, 0.4))
                        for key in io_rmpc_mean_rho_costs}
    },
    title=f"Figure 2 Left",
    log_xaxis=True,
    log_yaxis=True
)

- The experiment shown in Figure 2 center and left

In [None]:
from typing import List
import numpy as np

from io_agent.plant.fighter import FighterEnv
from io_agent.plant.base import Plant
from io_agent.evaluator import Transition

from common import run_mpc, run_io_mpc, run_io_rmpc
from utils import parallelize, save_experiment


train_dataset_length = 200
n_trials = 50
n_rho = 20
env_length = 61
expert_horizon = 20
n_train_obl_trajectories = 20
n_past = 1
n_proc = 9  # Choose this based on your CPU cores
disturbance_bias = np.array([0.0, 0.005]).reshape(-1, 1)
general_seed = 42
seed_rng = np.random.default_rng(general_seed)

# Environment with disturbance bias
plant = FighterEnv(max_length=env_length, disturbance_bias=disturbance_bias)
experiment_seeds = dict(
    dataset_permute=seed_rng.integers(0, 2**30, size=1),
    trials=seed_rng.integers(0, 2**30, size=n_trials)
)

# Environment without disturbance bias
unbiased_plant = FighterEnv(max_length=env_length)
mpc_obl_train_trajectories = parallelize(
    n_proc=min(n_proc, n_train_obl_trajectories),
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=False,  # Without hindsight
                      horizon=expert_horizon,
                      plant=unbiased_plant,
                      env_reset_rng=np.random.default_rng(_seed))
                 for _seed in seed_rng.integers(0, 2**30, size=n_train_obl_trajectories)],
    loading_bar_kwargs={"desc": "MPC-obl-dataset trials"})


def run_io_trials(mpc_obl_dataset: List[List[Transition]],
                  rho: float,
                  plant: Plant,
                  ) -> List[List[Transition]]:
    return parallelize(
        n_proc=min(n_proc, n_trials),
        fn=run_io_rmpc(mpc_obl_dataset,
                       dataset_length=200,
                       n_past=n_past,
                       plant=plant,
                       expert_horizon=expert_horizon,
                       expert_rho=rho,
                       dataset_permute_rng=np.random.default_rng(experiment_seeds["dataset_permute"])),
        kwargs_list=[dict(env_reset_rng=np.random.default_rng(_seed))
                     for _seed in experiment_seeds["trials"]]
    )


# Rho values in log scale
rho_keys = np.logspace(-3, -0.5, n_rho)
rho_trajectories = parallelize(
    n_proc=n_rho,
    fn=run_io_trials,
    kwargs_list=[dict(rho=_rho,
                      plant=plant,
                      mpc_obl_dataset=mpc_obl_train_trajectories)
                 for _rho in rho_keys],
    loading_bar_kwargs=dict(desc="IO-RMPC rho values")
)

io_mpc_dst_trajectories = parallelize(
    n_proc=min(n_proc, n_trials),
    fn=run_io_mpc(mpc_obl_train_trajectories,
                  dataset_length=200,
                  n_past=n_past,
                  expert_horizon=expert_horizon,
                  plant=plant,
                  dataset_permute_rng=np.random.default_rng(experiment_seeds["dataset_permute"])),
    kwargs_list=[dict(env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "IO-MPC trials"})

mpc_fdst_trajectories = parallelize(
    n_proc=min(n_proc, n_train_obl_trajectories),
    fn=run_mpc,
    kwargs_list=[dict(use_foresight=True,  # With hindsight
                      horizon=expert_horizon,
                      bias_aware=True,  # Aware of the bias in the disturbance
                      plant=plant,
                      env_reset_rng=np.random.default_rng(_seed))
                 for _seed in experiment_seeds["trials"]],
    loading_bar_kwargs={"desc": "MPC-fdst trials"})

# Save the experiment since it takes some time to complete.
save_experiment(
    values={"rho_trajectories": {key: trajectories for key, trajectories in zip(rho_keys, rho_trajectories)},
            "io_mpc_dst_trajectories": io_mpc_dst_trajectories,
            "mpc_fdst_trajectories": mpc_fdst_trajectories},
    seed=general_seed,
    exp_dir="./fighter_data/rho_ablation",
    name="figure_2_center_data")

- Figure 2 center

In [None]:
import numpy as np
from utils import steady_state_cost, load_experiment
from plotter import tube_figure

exp_trajectories = load_experiment("./fighter_data/rho_ablation/figure_2_center_data-42")
io_rho_trajectories = exp_trajectories["rho_trajectories"]
mpc_fdst_trajectories = exp_trajectories["mpc_fdst_trajectories"]


io_rmpc_ss_rho_costs = {
    key: steady_state_cost(trajectories, 0.4)
    for key, trajectories in io_rho_trajectories.items()
    if trajectories is not None
}

tube_figure(
    cost_data={
        "IO-RMPC": io_rmpc_ss_rho_costs,
        "MPC (f-dst)": {key: steady_state_cost(mpc_fdst_trajectories, 0.4) for key in io_rmpc_ss_rho_costs}
    },
    title=f"Figure 2 Left center"
)

- Figure 2 right

In [None]:
import numpy as np
from utils import steady_state_cost, load_experiment
from plotter import histogram_figure

exp_trajectories = load_experiment("./fighter_data/rho_ablation/figure_2_center_data-42")
io_rho_trajectories = exp_trajectories["rho_trajectories"]
mpc_fdst_trajectories = exp_trajectories["mpc_fdst_trajectories"]
io_dst_trajectories = exp_trajectories["io_mpc_dst_trajectories"]


io_rmpc_ss_rho_costs = {
    key: steady_state_cost(trajectories, 0.4)
    for key, trajectories in io_rho_trajectories.items()
    if trajectories is not None
}

opt_rho, opt_costs = min(list(io_rmpc_ss_rho_costs.items()), key=lambda item: np.median(item[1]))


histogram_figure(
    cost_data={
        "MPC (f-dst)": steady_state_cost(mpc_fdst_trajectories, 0.4),
        f"IO-RMPC(rho*={np.round(opt_rho, decimals=4)})": opt_costs,
        "IO-MPC": steady_state_cost(io_dst_trajectories, 0.4)
    },
    title=f"Figure 2 right"
)