In [None]:
from stable_baselines3 import TD3,SAC
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env

import gymnasium as gym
from gymnasium.wrappers import RescaleAction
import torch
import nav2d        # Have to import the nav2d Python script, else we can't make env
import numpy as np
import os, re, json, time
from datetime import datetime
from tqdm import tqdm

import pyautogui

Select the result and run to simulate

In [None]:
result_dir = os.getcwd()
result_num = "result_00011"
run_num = "run_100"
run_path = os.path.join(result_dir, "results", "Nav2D_TD3_SB3_results", result_num, run_num)
model_load = TD3.load(run_path)

Simulation parameters

In [None]:
# testing parameters
n_test = 10_000
success_count = 0

# environment options
width = 1920
height = 1080
default_camera_config = {"azimuth" : 90.0, "elevation" : -90.0, "distance" : 3, "lookat" : [0.0, 0.0, 0.0]}
render_mode = "human" if n_test<=10 else "rgb_array"
camera_id = 2

DEFAULT_CAMERA = "overhead_camera"
ENABLE_FRAME = True                     # enable the body frames
RENDER_EVERY_FRAME = True              # similar sim speed as MuJoCo rendering when set to False, else slower

Simulation

In [None]:
test_env = gym.make("Nav2D-v0", render_mode=render_mode, 
                    width=width,height=height,
                    default_camera_config=default_camera_config,
                    camera_id=camera_id,
                    max_episode_steps=1_000,
                    is_eval=False
                    )
obs, info = test_env.reset()

core_env = test_env.unwrapped
rew_goal = core_env.rew_goal_scale

agent_init_list = []
rew_head_list = []

for eps in tqdm(range(n_test), ncols = 100, colour = "#33FF00", desc = f"Evaluating..."):
    # if eps == 0:
    #     if DEFAULT_CAMERA=="overhead_camera": pyautogui.press('tab')
    #     if ENABLE_FRAME: pyautogui.press('e') 
    #     if not RENDER_EVERY_FRAME: pyautogui.press('d')
    done = False
    while not done:
        action, _ = model_load.predict(obs, deterministic=True)
        # print(f"{action}           ", end='\r')
        nobs, rew, term, trunc, info = test_env.step(action)
        # if render_mode == "human":  # visual
        #     print(f"action: {action} | rew_appr: {info.get('rew_approach',-10.0):10.6f}                      ", end="\r")
        done = term or trunc

        if not done:
            obs = nobs
        else: 
            obs, info = test_env.reset()
            agent_init_list.append(info["agent_init"])
            rew_head_list.append(info["rew_head"])
            

        # --- count the success
        if rew == rew_goal: success_count += 1  

# print(f"\rSuccess rate out    e of {n_test} runs is {success_count/n_test*100:.2f}%             ")
test_env.close()

Evaluating...:   2%|[38;2;51;255;0m█                                             [0m| 23/1000 [00:03<02:30,  6.51it/s][0m

Inspect the headding reward over the runs

In [None]:
from statistics import mean, stdev
print(f"Heading reward           μ = {mean(rew_head_list): 6.5f}, σ={stdev(rew_head_list): 6.5f}")

Explore the reward received in each state space regions (discretized)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

size = core_env.size
n_bins = 50
grid_count = np.zeros((n_bins, n_bins), dtype=int)
grid_reward = np.zeros((n_bins, n_bins), dtype=np.float64)

for idx, agent_init in enumerate(agent_init_list):
    x, y, theta = agent_init
    
    ix = int((x+size)/(2*size) * (n_bins - 1))
    iy = int((y+size)/(2*size) * (n_bins - 1))
    if ix >= 100 or iy >= 100:
        print(x, y, ix, iy)
    grid_count[ix, iy] += 1
    count = grid_count[ix,iy]
    alpha = (count-1)/count
    grid_reward[ix,iy] = alpha * grid_reward[ix,iy] + (1-alpha) * rew_head_list[idx]
    
# print(grid)
fig, axes = plt.subplots(1,2, figsize=(20,8))
axes[0] = sns.heatmap(grid_count, ax=axes[0], cmap = 'plasma')
axes[0].invert_yaxis()
axes[0].set_aspect('equal')
axes[0].set_title(f'Agent spawn frequency in {result_num}')

plt.show 

axes[1] = sns.heatmap(grid_reward, ax=axes[1], cmap = 'plasma')
axes[1].invert_yaxis()
axes[1].set_aspect('equal')
axes[1].set_title(f'Average reward in {result_num}')
plt.show

In [None]:
rew_head_list