- - -
## Iterative IO algorithm
- - -
#### Quadrotor Environment

The Q minimization:
$$
\min_u 2s^T\theta_{su}u + u^T\theta_{uu}u \\ \text{s.t.} \quad G_u u \leq h_u
$$

- Create dataset

In [None]:
from typing import List
from itertools import product
from functools import partial
import numpy as np
import multiprocessing

from io_agent.evaluator import Transition
from io_agent.plant.quadrotor import QuadrotorEnv

from common import run_mpc, run_io_rmpc
from utils import parallelize, steady_state_cost, save_experiment


n_cpu = multiprocessing.cpu_count()
horizon = 25
n_trials = 20
n_past = 2
add_bias = False
n_dataset_trials = 20
n_rhos = 12
general_seed = 42
seed_rng = np.random.default_rng(general_seed)

plant = QuadrotorEnv()
permute_seed, *trial_seeds = seed_rng.integers(0, 2**30, n_trials + 1)
dataset_trial_seeds = seed_rng.integers(0, 2**30, n_dataset_trials)

dataset_trajectories = parallelize(
    n_proc=min(n_cpu, n_dataset_trials),
    fn=partial(run_mpc, plant=plant),
    kwargs_list=[
        dict(
            horizon=horizon,
            use_foresight=False,  # Without hindsight data
            bias_aware=False,
            env_reset_rng=np.random.default_rng(_seed)
        ) for _seed in dataset_trial_seeds
    ],
    loading_bar_kwargs=dict(desc="MPC dataset trials")
)

save_experiment(
    values={"mpc_dataset": dataset_trajectories},
    seed=general_seed,
    exp_dir="./quadrotor_data/dataset",
    name="mpc_obl"
)

- Construct the Q function

In [None]:
from typing import Callable, List, Tuple, Optional, Any, Union, Dict
from dataclasses import dataclass
from functools import partial
from tqdm.notebook import tqdm
import numpy as np
import torch
import geotorch
import cvxpy as cp

from io_agent.plant.quadrotor import QuadrotorEnv
from io_agent.plant.base import (NominalLinearEnvParams,
                                 LinearizationWrapper,
                                 LinearConstraints,
                                 LinearConstraint,
                                 Plant)
from io_agent.control.mpc import Optimizer
from io_agent.utils import AugmentedTransition, FeatureHandler
from io_agent.control.io import AugmentDataset
from io_agent.control.mpc import MPC
from io_agent.control.deep_io import IterativeIOController
from io_agent.evaluator import Transition

from utils import load_experiment, parallelize
from common import run_agent


@dataclass
class DeepIOArgs():
    n_past: int = 2
    add_bias: int = False
    use_action_regressor: bool = False
    use_noise_regressor: bool = True
    use_state_regressor: bool = False
    horizon: int = 25
    use_expert: bool = False
    learning_rate: float = 1e-2
    lr_exp_decay: float = 0.98
    n_epoch: int = 1000
    n_batch: int = 128


def prepare_deep_io(dataset: List[List[Transition]],
                  env: Plant,
                  rng: np.random.Generator,
                  args: DeepIOArgs,
                  verbose: bool = True
                  ) -> IterativeIOController:
    feature_handler = FeatureHandler(
        params=env.nominal_model(),
        n_past=args.n_past,
        add_bias=args.add_bias,
        use_action_regressor=args.use_action_regressor,
        use_noise_regressor=args.use_noise_regressor,
        use_state_regressor=args.use_state_regressor,
    )
    expert_agent = None
    if args.use_expert:
        expert_agent = MPC(
            action_size=lin_env.action_size,
            state_size=lin_env.state_size,
            noise_size=lin_env.noise_size,
            output_size=lin_env.output_size,
            horizon=args.horizon)
        expert_agent.optimizer = expert_agent.prepare_optimizer(feature_handler.params)
    augmenter = AugmentDataset(
        expert_agent=expert_agent,
        feature_handler=feature_handler,
    )
    augmented_dataset = augmenter(dataset)
    torch.manual_seed(rng.integers(0, 2**30).item())
    iterative_io_agent = IterativeIOController(
        constraints=feature_handler.params.constraints,
        feature_handler=feature_handler,
        learning_rate=args.learning_rate,
        include_constraints=True,
        action_constraints_flag=True,
        state_constraints_flag=False,
        lr_exp_decay=args.lr_exp_decay,
    )
    return iterative_io_agent, augmented_dataset

In [None]:
import multiprocessing

from common import run_agent
from utils import save_experiment


lin_env = LinearizationWrapper(QuadrotorEnv())
dataset = load_experiment("./quadrotor_data/dataset/mpc_obl-42")["mpc_dataset"]

n_trials = 50
n_cpu = multiprocessing.cpu_count()
general_seed = 43
seed_rng = np.random.default_rng(general_seed)
trial_seeds = seed_rng.integers(0, 2**30, n_trials)
evaluation_epochs = [int(val) for val in np.floor(np.logspace(1, 3, 15))]


def deep_io_trials(args: DeepIOArgs, rng: np.random.Generator, trial_seeds: List[int]):
    if args.n_epoch != 0:
        raise ValueError("```n_epoch``` must be set to 0")
    deep_io_agent, augmented_dataset = prepare_deep_io(
        dataset, lin_env, rng, args, verbose=False)
    losses = []
    costs = {}
    with tqdm(total=evaluation_epochs[-1]) as pbar:
        for eval_break_epoch in evaluation_epochs:
            n_epoch = eval_break_epoch - len(losses)
            _losses = deep_io_agent.train(
                augmented_dataset,
                epochs=n_epoch,
                batch_size=args.n_batch,
                rng=rng,
                verbose=False)
            losses.extend(_losses)
            deep_io_agent.action_optimizer = deep_io_agent.prepare_action_optimizer()
            deep_io_trajectories = parallelize(
                n_proc=min(n_cpu, 50),
                fn=partial(run_agent, agent=deep_io_agent, plant=lin_env),
                kwargs_list=[
                    dict(
                        use_foresight=False,
                        bias_aware=False,
                        env_reset_rng=np.random.default_rng(_seed)
                    ) for _seed in trial_seeds
                ],
            )
            deep_io_costs = [300 - np.sum([np.exp(-tran.cost) for tran in trajectory])
                             for trajectory in deep_io_trajectories]
            costs[eval_break_epoch] = deep_io_costs
            pbar.set_postfix({"Median cost": np.median(deep_io_costs)})
            pbar.update(n_epoch)
    return costs, losses


experiment_args = {
    "DeepIO-No-expert": DeepIOArgs(use_expert=False, n_epoch=0),
    "DeepIO-MPC-expert": DeepIOArgs(use_expert=True, n_epoch=0),
}

results = {}
for key, args in experiment_args.items():
    results[key] = deep_io_trials(
        args,
        np.random.default_rng(seed_rng.integers(0, 2*30)),
        trial_seeds)
    

save_experiment(
    values={"deep-io": results},
    seed=general_seed,
    exp_dir="./quadrotor_data/ablation",
    name="deep-io"
)

In [None]:
import numpy as np
import matplotlib.ticker as tck

from utils import steady_state_cost, load_experiment
from collections import defaultdict
from plotter import histogram_figure, histogram_figure_plt, tube_figure_plt


fig = histogram_figure_plt(
    cost_data={key: value[0][1000] for key, value in results.items()},
    title="",
    bw_method="scott",
    bw_adjust=0.30,
    log_yaxis=True,
    y_label="log density",
    low_y=1e-3,
    figsize=(6, 3)
)

fig, axes = tube_figure_plt(
    cost_data={key: value[0] for key, value in results.items()},
    title=f"",
    log_xaxis=True,
    log_yaxis=False,
    x_label="epoch",
    y_label="episodic cost",
    percentiles=(20, 80)
)

fig, axes = tube_figure_plt(
    cost_data={key: {index + 1: value for index, value in enumerate(value[1])} for key, value in results.items()},
    title=f"",
    log_xaxis=True,
    log_yaxis=True,
    x_label="epoch",
    y_label="sub loss",
    percentiles=(20, 80)
)
