In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

from utils.config import load_config_nb

# Configuration for plotting and warnings
sns.set(context='notebook', font_scale=1.1, 
        style='ticks', rc={'figure.figsize': (8, 3), 'figure.facecolor': 'none', 'axes.facecolor': 'none'})
plt.set_loglevel('WARNING')
plt.rcParams.update({'lines.markeredgewidth': 1})
warnings.filterwarnings("ignore")
%config InlineBackend.figure_format = 'svg'

In [2]:
from algorithms.ppo.sb3.data_reg_ppo import DataRegularizedPPO
from algorithms.ppo.sb3.reg_ppo import RegularizedPPO
from utils.policy_evaluation import evaluate_policy

### Evaluate KL-regularized policy

In [3]:
kl_reg_policy = RegularizedPPO.load(
    '/home/emerge/daphne/nocturne_lab/models_tmp/policy_KL-reg-L0.06_S10_I786.zip'
);

data_reg_policy = RegularizedPPO.load(
    '/home/emerge/daphne/nocturne_lab/models_tmp/policy_Data-reg-L0.03_S10_I700.zip',
);

INFO:root:No regularization weight specified, using default PPO.
INFO:root:No regularization weight specified, using default PPO.


### Configurations

In [4]:
# Load environment config
env_config = load_config_nb("env_config")

# Set data path to NEW scenes (with is_av flag)
env_config.data_path = "../data/train_no_tl"


SELECT_FROM_K_SCENES = 10
K_EPISODES = 200

In [5]:
df = pd.DataFrame()

for controlled_agents in [1, 200]:
    for policy_type in ['kl_reg', 'data_reg']:
            
        if policy_type == 'kl_reg':
            policy = kl_reg_policy    
        elif policy_type == 'data_reg':
            policy = data_reg_policy
        
        df_res = evaluate_policy(
            env_config=env_config,
            controlled_agents=controlled_agents,
            data_path=env_config.data_path,
            mode="policy",
            policy=policy,
            select_from_k_scenes=SELECT_FROM_K_SCENES,
            num_episodes=K_EPISODES,
            use_av_only=False,
            deterministic=False,
        )
        
        eval_mode = "self-play" if controlled_agents > 1 else "log-replay"
        
        df_res["eval_mode"] = eval_mode
        df_res["policy_type"] = policy_type
        
        df = pd.concat([df, df_res], ignore_index=True)

  0%|          | 0/200 [00:00<?, ?it/s]

100%|██████████| 200/200 [00:14<00:00, 13.93it/s]
100%|██████████| 200/200 [00:14<00:00, 14.09it/s]
  9%|▉         | 18/200 [00:05<01:45,  1.73it/s]

In [None]:
tab_res = pd.pivot_table( 
    df, 
    values=["goal_rate", "off_road", "veh_veh_collision"], 
    index=["policy_type", "eval_mode"], 
    aggfunc="mean"
)

In [None]:
# Diplay the first two decimal places
tab_res *= 100
tab_res = tab_res.apply(lambda x: round(x, 2))

tab_res

Unnamed: 0_level_0,Unnamed: 1_level_0,goal_rate,off_road,veh_veh_collision
policy_type,eval_mode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
data_reg,log-replay,86.5,7.0,8.0
data_reg,self-play,96.6,3.4,0.43
kl_reg,log-replay,85.5,5.5,9.5
kl_reg,self-play,94.26,4.15,1.81
