---

This Notebook was developed by [Haimin Hu](https://haiminhu.org/) and [Zixu Zhang](https://zzx9636.github.io/) for the CoRL'23 paper [_Deception game: Closing the safety-learning loop in interactive robot autonomy_](https://saferoboticslab.github.io/Belief-Game/).

Instructions:

1. Select the planner: 'bel' (Deception Game) | 'map' (MAP baseline) | 'con' (contingency baseline) | 'rbs' (robust baseline)
2. Run the cell to produce the simulated trajectory.

Note that the code loads pre-trained models by default. Feel free to train your own model and test it with this Notebook.

---

In [None]:
import numpy as np
from os.path import join
from copy import deepcopy
from functools import partial
import matplotlib.pyplot as plt
from omegaconf import OmegaConf
from types import SimpleNamespace

from agent.isaacs import ISAACS
from utils.dstb import adv_dstb
from simulators import BeliefGameEnv, UberNoBeliefEnv
from utils.visualization import plot_traj, get_values

method = 'bel'  # 'bel', 'map', 'con', 'rbs'
is_inference_hypothesis = True

# region: loads configs
cfg = OmegaConf.load("config/intent_isaacs.yaml")
cfg.train.device = cfg.solver.device
cfg_rbs = OmegaConf.load("config/robust_isaacs.yaml")
cfg_rbs.train.device = cfg_rbs.solver.device
# endregion

# region: constructs the environment.
# Constructs the environment.
env_class = BeliefGameEnv
env = env_class(cfg.environment, cfg.agent, cfg.cost)
env.step_keep_constraints = False
env.track_len = 120

env_rbs_tmp = UberNoBeliefEnv(cfg_rbs.environment, cfg_rbs.agent, cfg_rbs.cost)
env_rbs_tmp.step_keep_constraints = False
env_rbs_tmp.track_len = env.track_len

# Constructs the solver.
solver = ISAACS(cfg.solver, cfg.train, cfg.arch, cfg.environment, verbose=False)
policy = solver.policy

solver_rbs = ISAACS(cfg_rbs.solver, cfg_rbs.train, cfg_rbs.arch, cfg_rbs.environment, verbose=False)
policy_rbs = solver_rbs.policy

# Loads ISAACS policies and value function.
_file_prefix = "experiments/bgame_intent_isaacs/v1"
policy.critics['central'].load_network(
    join(_file_prefix, 'model/central/central.pth'), verbose=True
)
policy.actors['ctrl'].load_network(join(_file_prefix, 'model/ctrl/ctrl.pth'), verbose=True)
policy.actors['dstb'].load_network(join(_file_prefix, 'model/dstb/dstb.pth'), verbose=True)

_file_prefix_robust = "experiments/robust_isaacs/v1"
policy_rbs.actors['ctrl'].load_network(
    join(_file_prefix_robust, 'model/ctrl/ctrl.pth'), verbose=True
)
policy_rbs.actors['dstb'].load_network(
    join(_file_prefix_robust, 'model/dstb/dstb.pth'), verbose=True
)

# Sets up the adversary.
dstb_policy = policy.dstb.net
adversary = partial(adv_dstb, dstb_policy=dstb_policy, use_ctrl=policy.dstb_use_ctrl)

# Constructs environments.
# -> Belief Game
env.agent.dyn.clip_dstb_based_on_belief = is_inference_hypothesis
env_bel = deepcopy(env)
env_bel.agent.init_policy(
    policy_type="NNCS", cfg=SimpleNamespace(device=policy.device), actor=policy.ctrl.net
)

# -> MAP Baseline
env_map = deepcopy(env_bel)

# -> Robust Baseline
env_rbs_tmp.agent.init_policy(
    policy_type="NNCS", cfg=SimpleNamespace(device=policy_rbs.device), actor=policy_rbs.ctrl.net
)
env_rbs = deepcopy(env_bel)
env_rbs.agent_robust = env_rbs_tmp.agent

# -> Contingency Baseline
env_con = deepcopy(env_rbs)
# endregion

# region: problem setup
Ni = env.agent.dyn.num_intent
#             0     1     2     3    4      5     6     7    8    9, ...
#            [x,    y,    v,    psi, delta, xH,   yH,   bP,  bC,  bIntents]
state_init = [24.0, 4.0, 12.0, 0., 0., 82.5, 19.0, 0.5, 0.5] + [1./Ni] * Ni

right_lane = 4.0
av_cruise_spd = state_init[2]
hum_cruise_spd = -2.5
assert av_cruise_spd <= cfg.cost.v_max
# endregion

# region: Simulates the scenario with both the optimal control and disturbance.
if method == 'bel':
  traj, result, info = env_bel.get_shielded_trajectory_and_cap_zone(
      np.asarray(state_init), py_track=right_lane, v_track=av_cruise_spd, T_rollout=160,
      adversary=adversary
  )
elif method == 'map':
  traj, result, info = env_map.get_shielded_trajectory_and_cap_zone(
      np.asarray(state_init), py_track=right_lane, v_track=av_cruise_spd, T_rollout=160,
      adversary=adversary, map_plan=True
  )
elif method == 'rbs':
  traj, result, info = env_rbs.get_shielded_trajectory_and_cap_zone(
      np.asarray(state_init), py_track=right_lane, v_track=av_cruise_spd, T_rollout=160,
      adversary=adversary, rbs_plan=True
  )
elif method == 'con':
  traj, result, info = env_con.get_shielded_trajectory_and_cap_zone(
      np.asarray(state_init), py_track=right_lane, v_track=av_cruise_spd, T_rollout=160,
      adversary=adversary, con_plan=True
  )
print("result: ", result)
# endregion

# region: plots
fontsize = 12
fig, axes = plt.subplots(3, 1, figsize=(8, 10))
ax = axes[0]

# Plots the trajectory.
plot_traj(ax, traj, result, c='g', lw=2., vel_scatter=False, zorder=1, s=40, plot_human=True)

# Plots the level set (slicing the terminal state).
xs, ys = env.get_samples(100, 100)
values = get_values(
    env, policy.value, xs, ys, batch_size=512, fail_value=cfg.cost.v_max, v=traj[-1, 2],
    yaw=traj[-1, 3], delta=traj[-1, 4], xH=traj[-1, 5], yH=traj[-1, 6], bP=traj[-1, 7], bC=traj[-1,
                                                                                                8]
)
im = ax.imshow(
    values.T, interpolation='none', extent=env.visual_extent, origin="lower", cmap='seismic',
    vmin=cfg.cost.v_min, vmax=cfg.cost.v_max, zorder=-1, alpha=0.5
)

ax.set_xlabel("x (m)", fontsize=fontsize)
ax.set_ylabel("y (m)", fontsize=fontsize)
ax.axis(env.visual_extent)
ax.set_xticks(np.around(env.visual_bounds[0], 1))
ax.set_yticks(np.around(env.visual_bounds[1], 1))
ax.tick_params(axis='both', which='major', labelsize=fontsize)

# Plots the type hypotheses belief.
bel_thres = cfg.agent.bel_thres
bel_thres_intent = cfg.agent.bel_thres_intent
ax = axes[1]
bP_traj = traj[:, 7]
ax.plot(np.linspace(0, len(bP_traj), len(bP_traj)), bP_traj, c='b')
ax.plot(
    np.linspace(0, len(bP_traj), len(bP_traj)), bel_thres * np.ones_like(bP_traj), c='r',
    linestyle='--'
)
ax.plot(
    np.linspace(0, len(bP_traj), len(bP_traj)), (1-bel_thres) * np.ones_like(bP_traj), c='r',
    linestyle='--'
)
ax.set_ylim(0, 1)
# ax.set_xlabel("time steps", fontsize=fontsize)
ax.set_ylabel("b(P)", fontsize=fontsize)

# Plots the intent hypotheses belief.
ax = axes[2]
for i in range(9, 9 + Ni):
  bI_traj = traj[:, i]
  ax.plot(np.linspace(0, len(bI_traj), len(bI_traj)), bI_traj)
  ax.plot(
      np.linspace(0, len(bI_traj), len(bI_traj)), bel_thres_intent * np.ones_like(bI_traj), c='r',
      linestyle='--'
  )
  ax.set_ylim(0, 1.)
  ax.set_xlabel("time steps", fontsize=fontsize)
  ax.set_ylabel("b(g)", fontsize=fontsize)
# endregion
