In [7]:
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize
from stable_baselines3.common.env_util import make_vec_env

import gymnasium as gym
from gymnasium.wrappers import RescaleAction, NormalizeObservation, NormalizeReward
import nav2d       
import numpy as np
import os
from tqdm import tqdm

test a given model:

In [8]:
import pyautogui
    
# paths:
results_path = "results/Nav2D_SAC_SB3_results"
desired_result_path = "result_060"
desired_run = "run_100"
normalize = True

# scales:
reward_scale = {
                "rew_dist_scale" : 0.0,
                "rew_dist_approach_scale" : 375.0,
                "rew_head_scale" : 0.25,
                "rew_head_approach_scale" : 0.0,
                "rew_goal_scale" : 2_500.0,
                "rew_obst_scale" : -500.0, 
                "rew_time" : -0.1}

randomization_options = {"agent_freq" : 1,
                         "goal_freq" : 1,
                         "obstacle_freq" : 1}

# visualize:
testing_length = 25 

# render settings:
width = 1280
height = 1280
default_camera_config = {"azimuth" : 90.0, "elevation" : -90.0, "distance" : 3, "lookat" : [0.0, 0.0, 0.0]}
camera_id = 2

DEFAULT_CAMERA = "overhead_camera"
ENABLE_FRAME = True
RENDER_EVERY_FRAME = True

# make a single environment:
env = gym.make("Nav2D-v0",
            render_mode = "human",
            width = width, 
            height = height,
            default_camera_config = default_camera_config, 
            camera_id = camera_id, 
            max_episode_steps = 1000, 
            is_eval = False,
            reward_scale_options = reward_scale,
            randomization_options = randomization_options)

# if vectorize is True:
if normalize:
    # make a vector consisting of a single environment:
    eval_env = DummyVecEnv([lambda: env])

    # load the normalization stats:
    eval_env = VecNormalize.load(os.path.join(results_path, desired_result_path, desired_run, "vec_norm_env_stats.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False
    print("created a normalized environment!")

    # load model:
    model = SAC.load(os.path.join(results_path, desired_result_path, desired_run, desired_run), env = eval_env)
else:
    # load model as is:
    model = SAC.load(os.path.join(results_path, desired_result_path, desired_run, desired_run))

    # set environment:
    eval_env = env
    print("created a base environment!")

created a normalized environment!


visualize the performance of the model:

In [9]:
# success:
successes = []

# reset before running:
if normalize:
    obs = eval_env.reset()
else:
    obs, _ = eval_env.reset()

# for every test episode:
for eps in range(testing_length):
    done = False

    # while not done:
    while not done:
        action, _ = model.predict(obs, deterministic = True)
        if normalize:
            nobs, reward, term, info = eval_env.step(action)
            # print(f"abs_diff: {nobs[0][0:2]} | reward: {reward}")
            done = term
        else:
            nobs, reward, term, trunc, _ = eval_env.step(action)
            done = term or trunc

        # advance observation, reset if not:
        if not done:
            obs = nobs
        else:
            if normalize:
                successes.append(info[0].get("is_success", False))
                # print(info, end = '\r')
            else: 
                obs, _ = eval_env.reset()

success_rate = np.mean(successes)
print(f"\nsuccess rate is: {success_rate*100}%")
     
# close when done:
eval_env.close()

 @ episode 25 | abs_diff: 0.496 | rew_dist_approach: 1.2985 | rew_head: 0.0605 | total: 1.25899                               
success rate is: 96.0%
