# Fighter Environment

Experiments and figures.

In [123]:
from typing import List, Optional, Dict, Union
import numpy as np
from tqdm.notebook import tqdm
from itertools import chain
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px

from io_agent.plant.fighter import FighterEnv, fighter_env_params
from io_agent.trainer import ControlLoop, Transition
from io_agent.control.mpc import MPC
from io_agent.control.rmpc import RobustMPC
from io_agent.control.io import IOController
from io_agent.utils import FeatureHandler


def run_agent(agent: MPC,
              env_length: int,
              use_foresight: bool,
              disturbance_bias: Optional[np.ndarray] = None
              ) -> List[Transition]:
    plant = FighterEnv(max_length=env_length, env_params=fighter_env_params)
    if disturbance_bias is None:
        disturbance_bias = np.zeros_like(plant.state_disturbance)
    trainer = ControlLoop(
        state_disturbance=plant.state_disturbance + disturbance_bias,
        output_disturbance=plant.output_disturbance,
        plant=plant,
        controller=agent
    )
    return trainer.simulate(
        initial_state=None,
        use_foresight=use_foresight,
    )


def run_mpc(env_length: int = 60,
            horizon: int = 20,
            use_foresight: bool = True,
            disturbance_bias: Optional[np.ndarray] = None
            ) -> List[Transition]:
    agent = MPC(
        env_params=fighter_env_params,
        horizon=horizon)
    return run_agent(
        agent=agent,
        env_length=env_length,
        use_foresight=use_foresight,
        disturbance_bias=disturbance_bias,
    )


def run_rmpc(env_length: int = 60,
             horizon: int = 20,
             use_foresight: bool = True,
             rho: float = 0.1,
             disturbance_bias: Optional[np.ndarray] = None
             ) -> List[Transition]:
    agent = RobustMPC(env_params=fighter_env_params,
                      horizon=horizon,
                      rho=rho,
                      state_constraints_flag=True,
                      input_constraints_flag=True)
    return run_agent(
        agent=agent,
        env_length=env_length,
        use_foresight=use_foresight,
        disturbance_bias=disturbance_bias,
    )


def run_io(dataset: List[Transition],
           env_length: int = 60,
           ) -> List[Transition]:
    feature_handler = FeatureHandler(
        env_params=fighter_env_params,
        n_past=1,
        add_bias=True,
        use_action_regressor=False,
        use_noise_regressor=True,
        use_state_regressor=False)
    agent = IOController(
        env_params=fighter_env_params,
        include_constraints=True,
        soften_state_constraints=True,
        state_constraints_flag=True,
        action_constraints_flag=True,
        horizon=(env_length - feature_handler.n_past) * len(dataset),
        feature_handler=feature_handler)
    agent.train(dataset)
    agent.action_optimizer = agent.prepare_action_optimizer()
    return run_agent(
        agent=agent,
        env_length=env_length,
        use_foresight=False,   # IO agent does not looks into the future
        disturbance_bias=None, # IO agent does not requires the disturbance sequences
    )


def make_figure(cost_data: Dict[str, List[float]],
                title: str,
                color_list: List[str] = px.colors.qualitative.T10
    ) -> go.FigureWidget:
    fig = go.FigureWidget()
    fig.update_layout(
    )
    
    cost_label_pair = list(cost_data.items())
    costs = [item[1] for item in cost_label_pair]
    labels = [item[0] for item in cost_label_pair]
    colors = [color_list[index % len(color_list)] for index in range(len(labels))]

    fig = ff.create_distplot(
        costs,
        group_labels=labels,
        colors=colors,
        bin_size=4,
        show_rug=False)
    for color, cost_list in zip(colors, costs):
        fig.add_vline(
            x=np.median(cost_list),
            line_width=3,
            line_dash="dash",
            line_color=color
            )

    common_axis_layout = dict(
            showline=True,
            linecolor = "#a2a2a2",
            linewidth = 1,
            showgrid = True,
            gridcolor = "#a2a2a2",
    )
    fig.update_layout(
        template="plotly_white",
        width=700,
        height=400,
        title=dict(text=f"{title}", x=0.5),
        yaxis=dict(
            **common_axis_layout,
             title=dict(text="density"),
             type="log"
             ),
        xaxis=dict(
            **common_axis_layout,
             title=dict(text="cost")
             ),
        bargap=0.1,
        font=dict(
            size=12,
            color="Black"
        )
    )
    return fig
    

## Trajectory Cost Distributions

- Experiment in Figure 4.1


In [33]:
n_trials = 5

mpc_obl_trajectories = [run_mpc(use_foresight=False) for _ in tqdm(range(n_trials))]
mpc_dst_trajectories = [run_mpc(use_foresight=True) for _ in tqdm(range(n_trials))]
io_mpc_dst_trajectories = [run_io(dataset=mpc_obl_trajectories) for _ in tqdm(range(n_trials))]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]


You are solving a parameterized problem that is not DPP. Because the problem is not DPP, subsequent solves will not be faster than the first one. For more information, see the documentation on Discplined Parametrized Programming, at
	https://www.cvxpy.org/tutorial/advanced/index.html#disciplined-parametrized-programming



Plot the cost distributions.

In [130]:
mpc_obl_costs = [transition.cost for transition in chain(*mpc_obl_trajectories)]
mpc_dst_costs = [transition.cost for transition in chain(*mpc_dst_trajectories)]
io_mpc_dst_costs = [transition.cost for transition in chain(*io_mpc_dst_trajectories)]

fig = make_figure({
    "MPC (obl)": mpc_obl_costs,
    "MPC (dst)": mpc_dst_costs,
    "IO-MPC (dst)": io_mpc_dst_costs,
    },
    title=f"Figure 4.1 with {len(mpc_dst_trajectories)} trials",
)

- Experiment in Figure 4.2

In [10]:
n_trials = 5
disturbance_bias = np.array([0.1, 0.05]).reshape(-1, 1)
rho=1e-2

mpc_pdst_trajectories = [run_mpc(use_foresight=True, disturbance_bias=disturbance_bias) for _ in tqdm(range(n_trials))]
rmpc_pdst_trajectories = [run_rmpc(use_foresight=True, disturbance_bias=disturbance_bias, rho=rho) for _ in tqdm(range(n_trials))]

io_mpc_pdst_trajectories = [run_io(dataset=mpc_pdst_trajectories) for _ in tqdm(range(n_trials))]
io_rmpc_pdst_trajectories = [run_io(dataset=rmpc_pdst_trajectories) for _ in tqdm(range(n_trials))]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

In [132]:
io_mpc_pdst_costs = [transition.cost for transition in chain(*io_mpc_pdst_trajectories)]
io_rmpc_pdst_costs = [transition.cost for transition in chain(*io_rmpc_pdst_trajectories)]

fig = make_figure({
    "IO-MPC": io_mpc_pdst_costs,
    "IO-RMPC": io_rmpc_pdst_costs,
    },
    title=f"Figure 4.2 with {len(io_rmpc_pdst_trajectories)} trials",
)
fig.update_layout(barmode="stack")


- Experiment in Figure 4.3

In [12]:
n_trials = 5
disturbance_bias = np.array([0.1, 0.05]).reshape(-1, 1)
rho=1e-2

# mpc_obl_trajectories = [run_mpc(use_foresight=False) for _ in tqdm(range(n_trials))]
mpc_fdst_trajectories = [run_mpc(use_foresight=True, disturbance_bias=None) for _ in tqdm(range(n_trials))]
# mpc_pdst_trajectories = [run_mpc(use_foresight=True, disturbance_bias=disturbance_bias) for _ in tqdm(range(n_trials))]
# rmpc_pdst_trajectories = [run_rmpc(use_foresight=True, disturbance_bias=disturbance_bias, rho=rho) for _ in tqdm(range(n_trials))]

# io_rmpc_pdst_trajectories = [run_io(dataset=rmpc_pdst_trajectories) for _ in tqdm(range(n_trials))]

  0%|          | 0/5 [00:00<?, ?it/s]

In [141]:
mpc_obl_costs = [transition.cost for transition in chain(*mpc_obl_trajectories)]
mpc_fdst_costs = [transition.cost for transition in chain(*mpc_fdst_trajectories)]
mpc_pdst_costs = [transition.cost for transition in chain(*mpc_pdst_trajectories)]
io_rmpc_pdst_costs = [transition.cost for transition in chain(*io_rmpc_pdst_trajectories)]

fig = make_figure({
        "MPC (obl)": mpc_obl_costs,
        "MPC (f-dst)": mpc_fdst_costs,
        "MPC (p-dst)": mpc_pdst_costs,
        "IO-RMPC": io_rmpc_pdst_costs,
    },
    title=f"Figure 4.3 with {len(io_rmpc_pdst_trajectories)} trials"
)
fig.update_layout()
