In [18]:
from stable_baselines3 import PPO
import numpy as np
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
from ot2_env_wrapper import OT2Env  # Custom environment wrapper
from clearml import Task  # Import ClearML's Task
import typing_extensions


# Paths to your trained models
model_paths = [
    r"C:\Users\Edopi\Desktop\VirtualMLP\models\ksnfka60\model.zip",
    r"C:\Users\Edopi\Desktop\VirtualMLP\models\06lqnlgo\model.zip",
    r"C:\Users\Edopi\Desktop\VirtualMLP\models\8nwi2fpk\model.zip",
    r"C:\Users\Edopi\Desktop\VirtualMLP\models\8102npc5\model.zip",
    r"C:\Users\Edopi\Desktop\VirtualMLP\models\rg8hfw19\model.zip",
    r"C:\Users\Edopi\Desktop\VirtualMLP\models\z4sevf4e\model.zip",
]


def evaluate_model_fixed_goal(model_path, env, fixed_goal, num_episodes=10):
    """
    Evaluate a trained RL model on a fixed goal.
    
    Args:
        model_path (str): Path to the trained model.
        env (gym.Env): The environment to test on.
        fixed_goal (np.array): Fixed goal position for evaluation.
        num_episodes (int): Number of episodes to test.
        
    Returns:
        tuple: (average accuracy, standard deviation of accuracy, average steps)
    """
    model = PPO.load(model_path)  # Load the model
    accuracies, steps_taken = [], []

    for episode in range(num_episodes):
        obs, _ = env.reset()
        env.goal_position = fixed_goal  # Override the goal position

        done = False
        step_count = 0

        while not done:
            action, _ = model.predict(obs)
            obs, reward, done, truncated, info = env.step(action)
            step_count += 1

            # Calculate distance to the fixed goal
            distance_to_goal = np.linalg.norm(obs[:3] - fixed_goal)

            if done:
                accuracies.append(distance_to_goal)
                steps_taken.append(step_count)

    # Compute average and standard deviation of results
    avg_accuracy = np.mean(accuracies)
    std_accuracy = np.std(accuracies)
    avg_steps = np.mean(steps_taken)

    return avg_accuracy, std_accuracy, avg_steps


def main():
    """
    Main function to evaluate all models with a fixed goal position.
    """
    fixed_goal_position = np.array([0.1, 0.1, 0.2], dtype=np.float32)  # Define fixed goal

    # Create the environment
    env = OT2Env(render=True, max_steps=1000)

    # Dictionary to store results
    results = {}

    # Evaluate each model
    for model_path in model_paths:
        print(f"Evaluating model: {model_path} with fixed goal {fixed_goal_position}")
        avg_accuracy, std_accuracy, avg_steps = evaluate_model_fixed_goal(
            model_path, env, fixed_goal_position
        )
        results[model_path] = {
            "avg_accuracy": avg_accuracy,
            "std_accuracy": std_accuracy,
            "avg_steps": avg_steps,
        }
        print(
            f"Model: {model_path} | Avg Accuracy: {avg_accuracy:.4f} m | "
            f"Std Dev: {std_accuracy:.4f} m | Avg Steps: {avg_steps:.2f}"
        )

    # Identify the best model based on accuracy
    best_model = min(results, key=lambda x: results[x]["avg_accuracy"])
    print(f"\nBest Model: {best_model}")
    print(
        f"Performance: Avg Accuracy = {results[best_model]['avg_accuracy']:.4f} m, "
        f"Std Dev = {results[best_model]['std_accuracy']:.4f} m, "
        f"Avg Steps = {results[best_model]['avg_steps']:.2f}"
    )

    # Close the environment
    env.close()


if __name__ == "__main__":
    main()


Evaluating model: C:\Users\Edopi\Desktop\VirtualMLP\models\ksnfka60\model.zip with fixed goal [0.1 0.1 0.2]
Reset: Pipette Position [0.073  0.0895 0.1195], Goal Position [0.16218033 0.20281601 0.18676804]
Step 1 called with action: [ 0.23224907 -1.         -0.16563052]
Step 2 called with action: [1.         0.43658206 0.50706035]
Step 3 called with action: [ 0.9630056  -0.03610066 -1.        ]
Step 4 called with action: [1.         0.55832815 1.        ]
Step 5 called with action: [-0.8701046  0.7266273 -0.6414531]
Step 6 called with action: [-1.          0.04590809 -0.4074437 ]
Step 7 called with action: [-1.          0.22810203  1.        ]
Step 8 called with action: [-0.7271892  -0.18299888 -1.        ]
Step 9 called with action: [-0.4944447  -0.2990655   0.12138129]
Step 10 called with action: [-1.         -0.6512987   0.05645168]
Step 11 called with action: [-0.0389949   0.18436463  0.90160954]
Step 12 called with action: [ 1.        -0.5575657  0.991462 ]
Step 13 called with acti

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

# Example visualization code
model_names = list(results.keys())
avg_distances = [results[model]["avg_distance"] for model in model_names]

plt.barh(model_names, avg_distances)
plt.xlabel("Average Distance to Target")
plt.ylabel("Model")
plt.title("Model Performance Comparison")
plt.show()
