## Quadrotor Environment

We use one of the [safe-control-gym](https://github.com/utiasDSL/safe-control-gym) environment.

### MPC and RL agent comparison

- Train PPO agent
- MPC Ablation on horizon and hindsight
- Compare RL agent and MPC variants
> Note that, we use [stable baselines 3](https://stable-baselines3.readthedocs.io/en/master/) repository as the source of RL agent implementation.

In [None]:
from typing import List
from itertools import product
from functools import partial
import os
import multiprocessing
import numpy as np
from tqdm.notebook import tqdm_notebook as tqdm

from io_agent.evaluator import Transition
from io_agent.plant.quadrotor import QuadrotorEnv
from io_agent.control.ppo import PPoController
from io_agent.evaluator import ControlLoop
from io_agent.plant.base import Plant

from common import run_mpc, run_io_mpc
from utils import parallelize, steady_state_cost, save_experiment


n_cpu = multiprocessing.cpu_count()
n_trials = 20
mpc_horizons = [15, 20, 25, 30, 35, 40]
general_seed = 42
seed_rng = np.random.default_rng(general_seed)

plant = QuadrotorEnv()
trial_seeds = seed_rng.integers(0, 2**30, n_trials)

ppo_path = f"./quadrotor_data/ppo-1m-{general_seed}"
if not os.path.exists(".".join([ppo_path, "zip"])):
    PPoController.train(lambda: QuadrotorEnv(use_exp_reward=True),
                        n_envs=10,
                        seed=general_seed,
                        path=ppo_path,
                        total_timesteps=int(1e6))
ppo_agent = PPoController(ppo_path)

ablation_keys = list(product(mpc_horizons, [True, False]))


def run_mpc_experiment(plant: Plant, horizon: int, use_hindsight: bool):
    return parallelize(
        n_proc=min(n_cpu, n_trials),
        fn=partial(run_mpc, plant=plant),
        kwargs_list=[
            dict(
                horizon=horizon,
                use_foresight=use_hindsight,
                bias_aware=False,
                env_reset_rng=np.random.default_rng(_seed)
            ) for _seed in trial_seeds
        ],
        loading_bar_kwargs=dict(desc=f"MPC with horizon:{horizon} and hindsight: {use_hindsight}")
    )


def evaluate_ppo(seed: int):
    return ControlLoop(plant=plant,
                       controller=ppo_agent,
                       rng=np.random.default_rng(seed)
                       ).simulate(False, False)


mpc_ablation_trajectories = parallelize(
    n_proc=n_cpu // min(n_cpu, n_trials),
    fn=partial(run_mpc_experiment, plant=plant),
    kwargs_list=[dict(horizon=horizon, use_hindsight=hindsight)
                 for horizon, hindsight in ablation_keys],
    loading_bar_kwargs=dict(desc=f"MPC ablation study")
)

ppo_trajectories = [
    evaluate_ppo(seed=_seed)
    for _seed in tqdm(trial_seeds, desc=f"PPO agent evaluation")
]

# Save the experiment since it takes some time to complete.
save_experiment(
    values={"mpc_ablation_trajectories": {
        key: trajectories for key, trajectories in zip(ablation_keys, mpc_ablation_trajectories)},
        "ppo_trajectories": ppo_trajectories
    },
    seed=general_seed,
    exp_dir="./quadrotor_data/ablation",
    name="mpc_vs_ppo")

In [None]:
import numpy as np
from utils import steady_state_cost, load_experiment
from plotter import tube_figure

exp_trajectories = load_experiment("./quadrotor_data/ablation/mpc_vs_ppo-42")
mpc_trajectories = exp_trajectories["mpc_ablation_trajectories"]
ppo_trajectories = exp_trajectories["ppo_trajectories"]

tube_figure(
    cost_data={
        "MPC (w/ Hindsight)": {key[0]: steady_state_cost(traj, 0.4)
                               for key, traj in mpc_trajectories.items() if key[1] == True},
        "MPC (w/o Hindsight)": {key[0]: steady_state_cost(traj, 0.4)
                                for key, traj in mpc_trajectories.items() if key[1] == False},
        "PPO": {key[0]: (steady_state_cost(ppo_trajectories, 0.4))
                for key in mpc_trajectories.keys()}
    },
    title=f"Quadrotor 2D - MPC and PPO comparison",
    log_xaxis=False,
    log_yaxis=True,
    xaxis_name="MPC horizon",
    percentiles=(20, 80)
)

- IO-MPC lookback horizon ablation

In [None]:
from typing import List
from itertools import product
from functools import partial
import os
import multiprocessing
import numpy as np
from tqdm.notebook import tqdm_notebook as tqdm


from io_agent.evaluator import Transition
from io_agent.plant.quadrotor import QuadrotorEnv
from io_agent.control.ppo import PPoController
from io_agent.evaluator import ControlLoop
from io_agent.plant.base import Plant

from common import run_mpc, run_io_mpc
from utils import parallelize, steady_state_cost, save_experiment


n_cpu = multiprocessing.cpu_count() - 12
horizon = 25
n_trials = n_cpu // 4
n_dataset_trials = 20
general_seed = 42
lookbacks = [0, 2, 4, 8, 12, 16]
seed_rng = np.random.default_rng(general_seed)

plant = QuadrotorEnv()
permute_seed, *trial_seeds = seed_rng.integers(0, 2**30, n_trials + 1)
dataset_trial_seeds = seed_rng.integers(0, 2**30, n_dataset_trials)

dataset_trajectories = parallelize(
    n_proc=min(n_cpu, n_dataset_trials),
    fn=partial(run_mpc, plant=plant),
    kwargs_list=[
        dict(
            horizon=horizon,
            use_foresight=False,  # Without hindsight data
            bias_aware=False,
            env_reset_rng=np.random.default_rng(_seed)
        ) for _seed in dataset_trial_seeds
    ],
    loading_bar_kwargs=dict(desc="MPC dataset trials")
)


def lookback_horizon_experiment(n_past: int) -> List[List[Transition]]:
    return parallelize(
        n_proc=min(n_trials, n_cpu),
        fn=run_io_mpc(dataset_trajectories,
                      dataset_length=300,
                      n_past=n_past,
                      add_bias=False,
                      expert_horizon=horizon,
                      plant=plant,
                      dataset_permute_rng=np.random.default_rng(permute_seed)),
        kwargs_list=[
            dict(env_reset_rng=np.random.default_rng(_seed))
            for _seed in trial_seeds
        ],
        loading_bar_kwargs=dict(desc=f"IO-MPC, H:{n_past}")
    )


ablation_trajectories = parallelize(
    n_proc=n_cpu // min(n_trials, n_cpu),
    fn=lookback_horizon_experiment,
    kwargs_list=[dict(n_past=n_past) for n_past in lookbacks],
    loading_bar_kwargs=dict(desc="IO-MPC H-ablation")
)

# Save the experiment since it takes some time to complete.
save_experiment(
    values={"h_ablation": {
        key: trajectories for key,
        trajectories in zip(lookbacks, ablation_trajectories)}},
    seed=general_seed,
    exp_dir="./quadrotor_data/ablation",
    name="io_mpc_lookbacks")

In [None]:
import numpy as np
from utils import steady_state_cost, load_experiment
from plotter import tube_figure

exp_trajectories = load_experiment("./quadrotor_data/ablation/io_mpc_lookbacks-42")
io_mpc_trajectories = exp_trajectories["h_ablation"]
exp_trajectories = load_experiment("./quadrotor_data/ablation/mpc_vs_ppo-42")
mpc_trajectories = exp_trajectories["mpc_ablation_trajectories"]

tube_figure(
    cost_data={
        "IO-MPC (25)": {key: steady_state_cost(traj, 0.4)
                        for key, traj in io_mpc_trajectories.items()},
        "MPC (25, w/ Hindsight)": {key: steady_state_cost(mpc_trajectories[(25, True)], 0.4)
                               for key in io_mpc_trajectories.keys()},
        "MPC (25, w/o Hindsight)": {key: steady_state_cost(mpc_trajectories[(25, False)], 0.4)
                                for key in io_mpc_trajectories.keys()},
    },
    title=f"Quadrotor 2D - IO-MPC Lookback Ablation",
    log_xaxis=False,
    log_yaxis=False,
    xaxis_name="loockback horizon",
    percentiles=(40, 60)
)

- IO-RMPC $\rho$ Ablation

In [1]:
from typing import List
from itertools import product
from functools import partial
import numpy as np
import multiprocessing

from io_agent.evaluator import Transition
from io_agent.plant.quadrotor import QuadrotorEnv

from common import run_mpc, run_io_rmpc
from utils import parallelize, steady_state_cost, save_experiment


n_cpu = multiprocessing.cpu_count()
horizon = 25
n_trials = 20
n_past = 2
add_bias = False
n_dataset_trials = 20
n_rhos = 1
general_seed = 42
seed_rng = np.random.default_rng(general_seed)

plant = QuadrotorEnv()
permute_seed, *trial_seeds = seed_rng.integers(0, 2**30, n_trials + 1)
dataset_trial_seeds = seed_rng.integers(0, 2**30, n_dataset_trials)

dataset_trajectories = parallelize(
    n_proc=min(n_cpu, n_dataset_trials),
    fn=partial(run_mpc, plant=plant),
    kwargs_list=[
        dict(
            horizon=horizon,
            use_foresight=False,  # Without hindsight data
            bias_aware=False,
            env_reset_rng=np.random.default_rng(_seed)
        ) for _seed in dataset_trial_seeds
    ],
    loading_bar_kwargs=dict(desc="MPC dataset trials")
)


def rho_experiment(rho: float) -> List[List[Transition]]:
    return parallelize(
        n_proc=min(n_cpu, n_trials),
        fn=run_io_rmpc(dataset_trajectories,
                      dataset_length=300,
                      n_past=n_past,
                      add_bias=add_bias,
                      expert_horizon=horizon,
                      expert_rho=rho,
                      plant=plant,
                      dataset_permute_rng=np.random.default_rng(permute_seed)),
        kwargs_list=[
            dict(env_reset_rng=np.random.default_rng(_seed))
            for _seed in trial_seeds
        ],
        loading_bar_kwargs=dict(desc=f"MPC rho:{rho} trials")
    )


rho_values = np.logspace(-1, 1, n_rhos)
ablation_trajectories = parallelize(
    n_proc=n_cpu // min(n_cpu, n_trials),
    fn=rho_experiment,
    kwargs_list=[
        dict(rho=rho)
        for rho in rho_values
    ],
    loading_bar_kwargs=dict(desc="IO-MPC rho ablation")
)

# Save the experiment since it takes some time to complete.
save_experiment(
    values={"rho_ablation": {key: trajectories for key,
                           trajectories in zip(rho_values, ablation_trajectories)}
                           },
    seed=general_seed,
    exp_dir="./quadrotor_data/ablation",
    name="io_rmpc_rho"
)

pybullet build time: Aug  3 2023 18:40:36
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
The same functionality is provided by providing additional input arguments to the 'integrator' function, in particular:
 * Call integrator(..., t0, tf, options) for a single output time, or
 * Call integrator(..., t0, grid, options) for multiple grid points.
The legacy 'output_t0' option can be emulated by including or excluding 't0' in 'grid'.
Backwards compatibility is provided in this release only.") [.../casadi/core/integrator.cpp:515]


MPC dataset trials:   0%|          | 0/20 [00:00<?, ?it/s]

IO-MPC rho ablation:   0%|          | 0/1 [00:00<?, ?it/s]

	https://www.cvxpy.org/tutorial/advanced/index.html#disciplined-parametrized-programming
