Test Stage-based RL

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO
from gym_chrono.envs.wheeled.off_road_artACL import off_road_art
from gymnasium.utils.env_checker import check_env
import torch as th
import numpy as np
import os
import re
import matplotlib.pyplot as plt
import csv
from torch.utils.tensorboard import SummaryWriter

def evaluate_model(env, model, num_trials=50, max_steps=1000, render=False):
    success_count = 0
    traversal_times = []
    episode_roll_means = []
    episode_pitch_means = []

    for trial in range(num_trials):
        obs, _ = env.reset(seed=trial)
        if render:
            env.render('follow')
        
        step_count = 0
        roll_angles = []
        pitch_angles = []
        
        while step_count < max_steps:
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, info = env.step(action)
            if render:
                env.render('follow')
            
            step_count += 1
            
            # Collect roll and pitch angles at each step
            euler_angles = env.m_vehicle.GetVehicle().GetRot().Q_to_Euler123()
            roll_angles.append(np.degrees(abs(euler_angles.x)))
            pitch_angles.append(np.degrees(abs(euler_angles.y)))
            
            if terminated or truncated:
                if terminated and step_count < 200:  # Successful trial
                    success_count += 1
                    traversal_times.append(env.m_system.GetChTime())
                break
        
        # Calculate mean roll and pitch angles for this episode
        episode_roll_means.append(np.mean(roll_angles))
        episode_pitch_means.append(np.mean(pitch_angles))

    mean_traversal_time = np.mean(traversal_times) if traversal_times else 0
    var_traversal_time = np.var(traversal_times) if traversal_times else 0
    avg_roll_angle = np.mean(episode_roll_means)
    var_roll_angle = np.var(episode_roll_means)
    avg_pitch_angle = np.mean(episode_pitch_means)
    var_pitch_angle = np.var(episode_pitch_means)

    return (success_count, mean_traversal_time, var_traversal_time, 
            avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle)

'''
calculates the success rate for level5, 
and returns the mean success rate.
'''
def evaluate_on_test_levels(env, model, test_level_dir, num_trials=50, max_steps=1000, render=False, writer=None, iteration=None):
    level_file = "level_5.bmp"
    level_path = os.path.join(test_level_dir, level_file)
    env.terrain_file = [level_path]
    env.update_terrain_stage(level_index=0)
    
    # Evaluate a single model on a given terrain
    (success_count, mean_traversal_time, var_traversal_time, 
     avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle) = evaluate_model(
        env, model, num_trials=num_trials, max_steps=max_steps, render=render)
    
    success_rate = float(success_count) / num_trials
    
    # Log the success rate for each test level
    if writer is not None and iteration is not None:
        writer.add_scalar(f'Success Rate/Test Level 5', success_rate, iteration)
    
    print(f"Test Level {level_file}: Success rate = {success_rate}\n")
    print(f"Mean Traversal Time: {mean_traversal_time:.2f} s", f"Average Roll/Pitch Angle: {avg_roll_angle:.2f}/{avg_pitch_angle:.2f} deg\n")
    print(f"Average Roll/Pitch Angle: {avg_roll_angle:.2f}/{avg_pitch_angle:.2f} deg")
    print(f"Variance Roll/Pitch Angle: {var_roll_angle:.2f}/{var_pitch_angle:.2f} deg\n")
    
    return (success_rate, mean_traversal_time, var_traversal_time, 
            avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle)

def extract_stage_and_iter(filename):
    """Extract stage and iteration numbers from the filename."""
    match = re.search(r'stage(\d+)_iter(\d+)', filename)
    if match:
        stage = int(match.group(1))
        iteration = int(match.group(2))
        return stage, iteration
    else:
        return None, None

# Loops through all the checkpoint files, evaluates each model on all test levels
def evaluate_checkpoints_on_test_levels(checkpoint_dir, test_level_dir, env, writer, csv_file, eval_interval=6, last_evaluated_iter=0):
    model_files = [f for f in os.listdir(checkpoint_dir) if f.endswith('.zip')]
    model_files = sorted(model_files, key=lambda f: extract_stage_and_iter(f))

    overall_success_rates = []
    iteration_numbers = []
    
    # Start the iteration counter at 0
    current_iteration = 0
    
    with open(csv_file, mode='w', newline='') as file:
        writer_csv = csv.writer(file)
        writer_csv.writerow(["Iteration", "Success Rate", "Mean Traversal Time", "Var Traversal Time", 
                             "Average Roll Angle", "Var Roll Angle", "Average Pitch Angle", "Var Pitch Angle"])
        
        for model_file in model_files:
            stage, iteration = extract_stage_and_iter(model_file)
            if (iteration - 1) % eval_interval != 0:
                continue
            
            print(f"Evaluating {model_file}")
            model = PPO.load(os.path.join(checkpoint_dir, model_file), env)
            
            (success_rate, mean_traversal_time, var_traversal_time, 
             avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle) = evaluate_on_test_levels(
                env, model, test_level_dir, num_trials=50, max_steps=1000, render=False, writer=writer, iteration=current_iteration)
            
            writer_csv.writerow([iteration, success_rate, mean_traversal_time, var_traversal_time, 
                                 avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle])
            
            overall_success_rates.append(success_rate)
            iteration_numbers.append(current_iteration)
            
            print(f"Checkpoint {model_file}: Success rate on last test level = {success_rate}\n")
            
            current_iteration += 5
    
    mean_success_rate = np.mean(overall_success_rates)
    print(f"Mean success rate across all evaluated checkpoints = {mean_success_rate}")
    
    return iteration_numbers, overall_success_rates, mean_success_rate

# Set up TensorBoard writer
checkpoint_dir = '../train/logs/vws_ppo_checkpoints_HopperNorm/'
test_level_dir = '../TestLevels'
last_evaluated_iter = 0

for run_index in range(1, 4):  # Run 3 evaluations
    log_dir = f'./StageTest_logs/run{run_index}'
    writer = SummaryWriter(log_dir)
    
    env = off_road_art()
    csv_file = f'./StageTest_logs/run{run_index}.csv'
    
    iteration_numbers, overall_success_rates, mean_success_rate = evaluate_checkpoints_on_test_levels(
        checkpoint_dir, test_level_dir, env, writer, csv_file, eval_interval=6, last_evaluated_iter=last_evaluated_iter)
    
    print(f"Overall success rates for all checkpoints in run {run_index}:", overall_success_rates)
    print(f"Mean success rate across all evaluated checkpoints in run {run_index}: {mean_success_rate}")
    
    writer.close()

Evaluating ppo_checkpoint_stage0_iter1.zip
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Terrain loaded from /home/tong/Documents/gym_chrono/envs/wheeled/../data/terrain_bitmaps/Automatic-CL/TrainLevels/../TestLevels/level_5.bmp
--------------------------------------------------------------
Time out
Initial position:  [ 9.89368, -6.92386, 3 ]
Final position of art:  [ 4.51501, -1.98017, 2.60062 ]
Goal position:  [ -10.0442, 10.4854, 3 ]
Distance to goal:  19.170817550701596
Reward:  -301.7225196256779
Accumulated Reward:  -1259.5771599877905
--------------------------------------------------------------
--------------------------------------------------------------
Time out
Initial position:  [ 12.0744, 2.95918, 3 ]
Final position of art:  [ 5.0211, -0.545161, 2.44286 ]
Goal position:  [ -10.3566, 10.0123, 3 ]
Distance to goal:  18.661307025979106
Reward:  -296.52510902702136
Accumulated Reward:  -1256.1804572594642
---------------------------------------

Test Specified Model

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO
from gym_chrono.envs.wheeled.off_road_artACL import off_road_art
from gymnasium.utils.env_checker import check_env
import torch as th
import numpy as np
import os
import re
import matplotlib.pyplot as plt
import csv
from torch.utils.tensorboard import SummaryWriter

def evaluate_model(env, model, num_trials=50, max_steps=1000, render=False):
    success_count = 0
    traversal_times = []
    episode_roll_means = []
    episode_pitch_means = []

    for trial in range(num_trials):
        obs, _ = env.reset(seed=trial)
        if render:
            env.render('follow')
        
        step_count = 0
        roll_angles = []
        pitch_angles = []
        
        while step_count < max_steps:
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, info = env.step(action)
            if render:
                env.render('follow')
            
            step_count += 1
            
            # Collect roll and pitch angles at each step
            euler_angles = env.m_vehicle.GetVehicle().GetRot().Q_to_Euler123()
            roll_angles.append(np.degrees(abs(euler_angles.x)))
            pitch_angles.append(np.degrees(abs(euler_angles.y)))
            
            if terminated or truncated:
                if terminated and step_count < 200:  # Successful trial
                    success_count += 1
                    traversal_times.append(env.m_system.GetChTime())
                break
        
        # Calculate mean roll and pitch angles for this episode
        episode_roll_means.append(np.mean(roll_angles))
        episode_pitch_means.append(np.mean(pitch_angles))

    mean_traversal_time = np.mean(traversal_times) if traversal_times else 0
    var_traversal_time = np.var(traversal_times) if traversal_times else 0
    avg_roll_angle = np.mean(episode_roll_means)
    var_roll_angle = np.var(episode_roll_means)
    avg_pitch_angle = np.mean(episode_pitch_means)
    var_pitch_angle = np.var(episode_pitch_means)

    return success_count, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle

def evaluate_on_test_levels(env, model, test_level_dir, num_trials=50, max_steps=1000, render=False, writer=None, iteration=None):
    level_file = "level_5.bmp"
    level_path = os.path.join(test_level_dir, level_file)
    env.terrain_file = [level_path]
    env.update_terrain_stage(level_index=0)
        
    # Evaluate a single model on a given terrain
    success_count, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle = evaluate_model(env, model, num_trials=num_trials, max_steps=max_steps, render=render)
    success_rate = float(success_count) / num_trials
    
    # Log the success rate for each test level
    if writer is not None and iteration is not None:
        writer.add_scalar(f'Success Rate/Test Level 5', success_rate, iteration)
    
    print(f"Test Level {level_file}: Success rate = {success_rate}\n")
    print(f"Mean Traversal Time: {mean_traversal_time:.2f} s", f"Average Roll/Pitch Angle: {avg_roll_angle:.2f}/{avg_pitch_angle:.2f} deg\n")
    print(f"Average Roll/Pitch Angle: {avg_roll_angle:.2f}/{avg_pitch_angle:.2f} deg")
    print(f"Variance Roll/Pitch Angle: {var_roll_angle:.2f}/{var_pitch_angle:.2f} deg\n")

    return success_rate, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle

# Set up TensorBoard writer and environment
checkpoint_file = './TestModel/ppo_checkpoint_iter24_level64'
test_level_dir = '../TestLevels'

# Ensure environment is correctly initialized
env = off_road_art()

for run_index in range(1, 4):  # Run 3 evaluations
    log_dir = f'./TestModel/run{run_index}_logs-24'
    writer = SummaryWriter(log_dir)
    
    csv_file = f'./TestModel/run{run_index}_results-24.csv'
    model = PPO.load(checkpoint_file, env)
    
    with open(csv_file, mode='w', newline='') as file:
        writer_csv = csv.writer(file)
        writer_csv.writerow(["Iteration", "Success Rate", "Mean Traversal Time", "Var Traversal Time", "Average Roll Angle", "Var Roll Angle", "Average Pitch Angle", "Var Pitch Angle"])
        
        # Evaluate the model on the test levels
        success_rate, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle = evaluate_on_test_levels(
            env, model, test_level_dir, num_trials=50, max_steps=1000, render=False, writer=writer, iteration=24)
        
        writer_csv.writerow([24, success_rate, mean_traversal_time, var_traversal_time, avg_roll_angle, var_roll_angle, avg_pitch_angle, var_pitch_angle])
    
    print(f"Run {run_index}: Success rate = {success_rate}, Mean Traversal Time = {mean_traversal_time}")
    
    writer.close()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Terrain loaded from /home/tong/Documents/gym_chrono/envs/wheeled/../data/terrain_bitmaps/Automatic-CL/TrainLevels/../TestLevels/level_5.bmp
--------------------------------------------------------------
Time out
Initial position:  [ -7.34469, 4.78818, 3 ]
Final position of art:  [ 0.205283, 0.397682, 2.65827 ]
Goal position:  [ 7.67097, -9.70266, 3 ]
Distance to goal:  12.564633832471388
Reward:  -235.64768315479586
Accumulated Reward:  -883.127030027535
--------------------------------------------------------------
--------------------------------------------------------------
Goal Reached
Initial position:  [ 11.8648, -3.59666, 3 ]
Goal position:  [ -5.18005, 9.13329, 2.5 ]
--------------------------------------------------------------
--------------------------------------------------------------
Time out
Initial position:  [ 11.8887, 3.53037, 3 ]
Final position of art:  [ 5.54191, 0.362017, 2.20413 ]
Goal 