In [1]:
# Dependencies
import numpy as np
import glob
import pandas as pd
from pyvirtualdisplay import Display
import seaborn as sns
import warnings
import os
import logging
import imageio
import matplotlib as mpl
from matplotlib import pyplot as plt, patches
from utils.config import load_config_nb
from evaluation.policy_evaluation import evaluate_policy
from nocturne.envs.base_env import BaseEnv
from pathlib import Path
from IPython.display import HTML, Image

sns.set('notebook', font_scale=1.1, rc={'figure.figsize': (10, 5)})
sns.set_style('ticks', rc={'figure.facecolor': 'none', 'axes.facecolor': 'none'})
%config InlineBackend.figure_format = 'svg'
warnings.filterwarnings("ignore")
plt.set_loglevel('WARNING')

mpl.rcParams['lines.markersize'] = 9

### Helper functions

In [2]:
def create_video(image_array, filename, *, fps=4, loop=500):
    with imageio.get_writer(filename, duration=1_000 / fps, loop=loop) as writer:
        for img in image_array:
            img = np.moveaxis(img, [0, 1, 2], [2, 0, 1])
            writer.append_data(img)


def display_gif(filename, width=500, height=500):
    display(
        Image(
            data=open(filename, "rb").read(), format="gif", width=width, height=height
        )
    )

### Settings

In [3]:
MAX_FILES = 100
DETERMINISTIC = True 
SINGLE_AGENT_MODE = True
METRICS = ['goal_rate', 'off_road', 'veh_veh_collision']

VIDEO_PATH = '../evaluation/videos' 

### Configurations

In [4]:
# Load config files
env_config = load_config_nb("env_config")
exp_config = load_config_nb("exp_config")
video_config = load_config_nb("video_config")
model_config = load_config_nb("model_config")

# Set data path
env_config.data_path = "../data_full/train/"
env_config.val_data_path = "../data_full/valid/"
env_config.num_files = MAX_FILES

# Convert to single agent env
if SINGLE_AGENT_MODE:
    env_config.max_num_vehicles = 1

# Logging level set to INFO
LOGGING_LEVEL = "CRITICAL"

# Scenes on which to evaluate the models
# Make sure file order is fixed so that we evaluate on the same files used for training
train_file_paths = glob.glob(f"{env_config.data_path}" + "/tfrecord*")
train_eval_files = sorted([os.path.basename(file) for file in train_file_paths])[:MAX_FILES]

# Test
test_file_paths = glob.glob(f"{env_config.val_data_path}" + "/tfrecord*")
test_eval_files = sorted([os.path.basename(file) for file in test_file_paths])[:MAX_FILES]

# Behavioral cloning model path
BC_BASE_PATH = f"../models/il/"

# Logging
logger = logging.getLogger()
logging.basicConfig(format="%(message)s")
logger.setLevel(LOGGING_LEVEL)

## 1. Establish baseline performance

### 1.1 What is the performance of expert replay? | Step env **using positions**

In [6]:
df_expert_replay = evaluate_policy(
    env_config=env_config,
    data_path='../data_full/train/',
    mode='expert_replay',
    controlled_agents=20,
    num_episodes=200,
    select_from_k_scenes=1000,
)

100%|██████████| 200/200 [00:22<00:00,  8.83it/s]


In [7]:
df_expert_replay[METRICS].mean() * 100

goal_rate            96.512408
off_road              0.737760
veh_veh_collision     2.749832
dtype: float64

### 1.2 What is the performance of expert replay? | Step env using **continuous actions**

In [None]:
df_cont_act_expert_replay = evaluate_policy(
    env_config=env_config,
    data_path='../data_full/train/',
    mode='cont_expert_act_replay',
    controlled_agents=20,
    num_episodes=200,
    select_from_k_scenes=1000,
)

In [None]:
df_cont_act_expert_replay[METRICS].mean() * 100

goal_rate            36.4
off_road             19.6
veh_veh_collision    12.7
dtype: float64

### 1.3 What is the performance of the discretized expert replay? | Step env using **discrete actions**

In [12]:
# Uncomment this to change the action space
# env_config.accel_discretization = 1001
# env_config.accel_lower_bound = -200
# env_config.accel_upper_bound = 200
# env_config.steering_lower_bound = -np.pi # steer right
# env_config.steering_upper_bound = np.pi # steer left
# env_config.steering_discretization = 1001

In [11]:
df_disc_expert_replay = evaluate_policy(
    env_config=env_config,
    mode='disc_expert_act_replay',
    num_scenes=100,
    max_iters=1000,
)

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:49<00:00, 20.29it/s]


In [13]:
df_disc_expert_replay[METRICS].mean() * 100

goal_rate            48.1
off_road             18.4
veh_veh_collision     8.5
dtype: object

### 1.4 What is the performance of the BC policy?

- BC model trained to 0.983 % accuracy
- No correction

In [7]:
pol_name = 'human_policy_D99_S13_FILTERED_01_29_14_46'
pol_name = "human_policy_D99_S13_FILTERED_01_29_14_46"
pol_name = 'human_policy_D99_S104_FILTERED_01_29_14_02'

In [9]:
from utils.policies import load_policy

# BEHAVIORAL CLONING
human_policy = load_policy(
    data_path="../models/il",
    file_name=pol_name, 
)

df_bc = evaluate_policy(
    env_config=env_config,
    controlled_agents=1,
    data_path='../data_full/train/',
    mode="policy",
    policy=human_policy,
    select_from_k_scenes=100,
    num_episodes=100,
)

100%|██████████| 100/100 [00:05<00:00, 18.56it/s]


In [10]:
df_bc[["goal_rate", "off_road", "veh_veh_collision"]].mean()

goal_rate            0.47
off_road             0.21
veh_veh_collision    0.12
dtype: float64