Test Stage-based RL

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO
from gym_chrono.envs.wheeled.off_road_artACL import off_road_art
from gymnasium.utils.env_checker import check_env
import torch as th
import numpy as np
import os
import re
import matplotlib.pyplot as plt
import csv
from torch.utils.tensorboard import SummaryWriter

def evaluate_model(env, model, num_trials=50, max_steps=1000, render=False):
    success_count = 0
    traversal_times = []
    episode_roll_means = []
    episode_pitch_means = []

    for trial in range(num_trials):
        obs, _ = env.reset(seed=trial)
        if render:
            env.render('follow')
        
        step_count = 0
        roll_angles = []
        pitch_angles = []
        
        while step_count < max_steps:
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, info = env.step(action)
            if render:
                env.render('follow')
            
            step_count += 1
            
            # Collect roll and pitch angles at each step
            euler_angles = env.m_vehicle.GetVehicle().GetRot().Q_to_Euler123()
            roll_angles.append(np.degrees(abs(euler_angles.x)))
            pitch_angles.append(np.degrees(abs(euler_angles.y)))
            
            if terminated or truncated:
                if terminated and step_count < 200:  # Successful trial
                    success_count += 1
                    traversal_times.append(env.m_system.GetChTime())
                break
        
        # Calculate mean roll and pitch angles for this episode
        episode_roll_means.append(np.mean(roll_angles))
        episode_pitch_means.append(np.mean(pitch_angles))

    mean_traversal_time = np.mean(traversal_times) if traversal_times else 0
    var_traversal_time = np.var(traversal_times) if traversal_times else 0
    avg_roll_angle = np.mean(episode_roll_means)
    var_roll_angle = np.var(episode_roll_means)
    avg_pitch_angle = np.mean(episode_pitch_means)
    var_pitch_angle = np.var(episode_pitch_means)

    return success_count, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle

'''
Loops over test level 5, calculates the success rate for level 5, 
and returns the mean success rate.
'''
def evaluate_on_test_levels(env, model, test_level_dir, num_trials=50, max_steps=1000, render=False, writer=None, iteration=None):
    level_file = "level_5.bmp"
    level_path = os.path.join(test_level_dir, level_file)
    env.terrain_file = [level_path]
    env.update_terrain_stage(level_index=0)
        
    # Evaluate a single model on a given terrain
    success_count, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle = evaluate_model(env, model, num_trials=num_trials, max_steps=max_steps, render=render)
    success_rate = float(success_count) / num_trials
    
    # Log the success rate for each test level
    if writer is not None and iteration is not None:
        writer.add_scalar(f'Success Rate/Test Level 5', success_rate, iteration)
    
    print(f"Test Level {level_file}: Success rate = {success_rate}\n")
    print(f"Mean Traversal Time: {mean_traversal_time:.2f} s", f"Average Roll/Pitch Angle: {avg_roll_angle:.2f}/{avg_pitch_angle:.2f} deg\n")
    print(f"Average Roll/Pitch Angle: {avg_roll_angle:.2f}/{avg_pitch_angle:.2f} deg")
    print(f"Variance Roll/Pitch Angle: {var_roll_angle:.2f}/{var_pitch_angle:.2f} deg\n")

    return success_rate, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle


# Loops through all the checkpoint files, evaluates each model on all test levels
def evaluate_checkpoints_on_test_levels(checkpoint_dir, test_level_dir, env, writer, csv_file, eval_interval=2, last_evaluated_iter=0):
    model_files = [f for f in os.listdir(checkpoint_dir) if f.endswith('.zip')]
    model_files = sorted(model_files, key=lambda f: int(re.search(r'iter(\d+)', f).group(1)))

    overall_success_rates = []
    iteration_numbers = []

    with open(csv_file, mode='w', newline='') as file:
        writer_csv = csv.writer(file)
        writer_csv.writerow(["Iteration", "Success Rate", "Mean Traversal Time", "Var Traversal Time", "Average Roll Angle", "Var Roll Angle", "Average Pitch Angle", "Var Pitch Angle"])
    
        for model_file in model_files:
            iteration = int(re.search(r'iter(\d+)', model_file).group(1))
            if iteration % eval_interval != 0:
                continue
            
            print(f"Evaluating {model_file}")
            model = PPO.load(os.path.join(checkpoint_dir, model_file), env)
            
            success_rate, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle = evaluate_on_test_levels(
                env, model, test_level_dir, num_trials=50, max_steps=1000, render=False, writer=writer, iteration=iteration)
            writer_csv.writerow([iteration, success_rate, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle])
            
            overall_success_rates.append(success_rate)
            iteration_numbers.append(iteration)
            
            print(f"Checkpoint {model_file}: Success rate on test level 5 = {success_rate}\n")
    
    mean_success_rate = np.mean(overall_success_rates)
    print(f"Mean success rate across all evaluated checkpoints = {mean_success_rate}")
    
    return iteration_numbers, overall_success_rates, mean_success_rate

# Set up TensorBoard writer
checkpoint_dir = '../train/logs/vws_ppo_checkpoints_BaseRL/'
test_level_dir = '../TestLevels'
last_evaluated_iter = 0

for run_index in range(1, 4):  # Run 3 evaluations
    log_dir = f'./BaseRLTest_logs/run{run_index}'
    writer = SummaryWriter(log_dir)
    
    env = off_road_art()
    csv_file = f'./BaseRLTest_logs/run{run_index}.csv'
    
    iteration_numbers, overall_success_rates, mean_success_rate = evaluate_checkpoints_on_test_levels(
        checkpoint_dir, test_level_dir, env, writer, csv_file, eval_interval=2, last_evaluated_iter=last_evaluated_iter)
    
    print(f"Overall success rates for all checkpoints in run {run_index}:", overall_success_rates)
    print(f"Mean success rate across all evaluated checkpoints in run {run_index}: {mean_success_rate}")
    
    writer.close()

Evaluating ppo_checkpoint_iter0_level49.zip
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Terrain loaded from /home/tong/Documents/gym_chrono/envs/wheeled/../data/terrain_bitmaps/Automatic-CL/TrainLevels/../TestLevels/level_5.bmp
--------------------------------------------------------------
Time out
Initial position:  [ 9.87322, 6.94711, 3 ]
Final position of art:  [ 1.92636, -0.407142, 2.84628 ]
Goal position:  [ -10.4803, 0.642897, 2.5 ]
Distance to goal:  12.455834706903207
Reward:  -234.5575997519418
Accumulated Reward:  -1273.9303447608781
--------------------------------------------------------------
--------------------------------------------------------------
Time out
Initial position:  [ 5.55539, -9.87974, 3 ]
Final position of art:  [ 5.45946, -9.32216, 2.38377 ]
Goal position:  [ 0.803428, 10.4692, 2.5 ]
Distance to goal:  20.332015090216164
Reward:  -303.9328563728093
Accumulated Reward:  -827.883289824935
-----------------------------------