# FastTD3 Model Evaluation with PlayIsaacLabEnv

This notebook demonstrates how to use the PlayIsaacLabEnv class to evaluate and visualize trained FastTD3 models.

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

# Add the project root to the Python path
sys.path.append('..')

from fast_td3.fast_td3 import Actor, MultiTaskActor
from fast_td3.environments.play_isaaclab_env import PlayIsaacLabEnv
from fast_td3.fast_td3_utils import EmpiricalNormalization

## Configuration

In [None]:
# Configuration
MODEL_PATH = "../models/your_trained_model.pt"  # Update with your model path
TASK_NAME = "Isaac-Cartpole-v0"  # Update with your task name
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_EPISODES = 5
SEED = 42

print(f"Device: {DEVICE}")
print(f"Model path: {MODEL_PATH}")
print(f"Task: {TASK_NAME}")

## Load Trained Model

In [None]:
def load_model_checkpoint(model_path: str, device: str):
    """Load model checkpoint and extract components."""
    
    if not os.path.exists(model_path):
        print(f"Model not found: {model_path}")
        return None, None, None
    
    checkpoint = torch.load(model_path, map_location=device)
    print("Checkpoint keys:", list(checkpoint.keys()))
    
    # Extract actor
    actor_state_dict = checkpoint.get('actor_state_dict', checkpoint.get('actor'))
    if actor_state_dict is None:
        print("Warning: Could not find actor in checkpoint")
        return None, None, None
    
    # You'll need to manually specify the actor parameters
    # These should match what was used during training
    actor_kwargs = {
        'n_obs': 1090,  # Update based on your observation space
        'n_act': 2,     # Update based on your action space  
        'num_envs': 1,
        'init_scale': 0.1,
        'hidden_dim': 256,
        'device': device,
    }
    
    actor = Actor(**actor_kwargs)
    actor.load_state_dict(actor_state_dict)
    actor.to(device)
    actor.eval()
    
    # Load normalizers if available
    obs_normalizer = None
    if 'obs_normalizer' in checkpoint:
        obs_normalizer = EmpiricalNormalization(shape=actor_kwargs['n_obs'], device=device)
        obs_normalizer.load_state_dict(checkpoint['obs_normalizer'])
        obs_normalizer.eval()
    
    return actor, obs_normalizer, checkpoint

# Load the model
actor, obs_normalizer, checkpoint = load_model_checkpoint(MODEL_PATH, DEVICE)

if actor is not None:
    print("✅ Model loaded successfully!")
    print(f"Actor type: {type(actor).__name__}")
    if obs_normalizer is not None:
        print("✅ Observation normalizer loaded")
else:
    print("❌ Failed to load model")

## Create Play Environment

In [None]:
# Create the play environment
play_env = PlayIsaacLabEnv(
    task_name=TASK_NAME,
    device=DEVICE,
    num_envs=1,
    seed=SEED,
    enable_viewport=True,  # Set to False if running headless
    record_video=True,
    video_path="./evaluation_videos/"
)

print(f"✅ Environment created successfully")
print(f"Observation space: {play_env.num_obs}")
print(f"Action space: {play_env.num_actions}")
print(f"Max episode steps: {play_env.max_episode_steps}")

## Single Episode Evaluation

In [None]:
# Play a single episode
if actor is not None:
    print("Playing single episode...")
    
    episode_stats = play_env.play_episode(
        actor_model=actor,
        deterministic=True,
        obs_normalizer=obs_normalizer,
        verbose=True
    )
    
    print("\n📊 Episode Results:")
    for key, value in episode_stats.items():
        print(f"{key}: {value}")
else:
    print("❌ No actor model loaded")

## Multi-Episode Evaluation

In [None]:
# Evaluate over multiple episodes
if actor is not None:
    print(f"Evaluating over {NUM_EPISODES} episodes...")
    
    results = play_env.evaluate_model(
        actor_model=actor,
        num_episodes=NUM_EPISODES,
        obs_normalizer=obs_normalizer,
        deterministic=True,
        verbose=True
    )
    
    print("\n📈 Evaluation completed!")
else:
    print("❌ No actor model loaded")

## Visualize Results

In [None]:
# Visualize the evaluation results
if actor is not None and 'results' in locals():
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # Episode rewards
    ax1.bar(range(len(results['episode_rewards'])), results['episode_rewards'])
    ax1.axhline(y=results['mean_reward'], color='r', linestyle='--', 
                label=f'Mean: {results["mean_reward"]:.2f}')
    ax1.set_xlabel('Episode')
    ax1.set_ylabel('Reward')
    ax1.set_title('Episode Rewards')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Episode lengths
    ax2.bar(range(len(results['episode_lengths'])), results['episode_lengths'])
    ax2.axhline(y=results['mean_length'], color='r', linestyle='--',
                label=f'Mean: {results["mean_length"]:.1f}')
    ax2.set_xlabel('Episode')
    ax2.set_ylabel('Length')
    ax2.set_title('Episode Lengths')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Print summary statistics
    print("\n📊 Summary Statistics:")
    print(f"Mean Reward: {results['mean_reward']:.2f} ± {results['std_reward']:.2f}")
    print(f"Mean Length: {results['mean_length']:.1f} ± {results['std_length']:.1f}")
    print(f"Success Rate: {results['success_rate']:.2%}")
else:
    print("❌ No results to visualize")

## Compare with Random Policy

In [None]:
# Create a random policy for comparison
class RandomPolicy:
    def __init__(self, action_dim, device):
        self.action_dim = action_dim
        self.device = device
    
    def explore(self, obs, deterministic=False):
        batch_size = obs.shape[0]
        return torch.randn(batch_size, self.action_dim, device=self.device)

# Evaluate random policy
print("Evaluating random policy for comparison...")
random_policy = RandomPolicy(play_env.num_actions, DEVICE)

random_results = play_env.evaluate_model(
    actor_model=random_policy,
    num_episodes=NUM_EPISODES,
    deterministic=False,
    verbose=False
)

# Compare results
if actor is not None and 'results' in locals():
    print("\n🔍 Trained vs Random Policy Comparison:")
    print(f"Trained Policy - Mean Reward: {results['mean_reward']:.2f}")
    print(f"Random Policy  - Mean Reward: {random_results['mean_reward']:.2f}")
    print(f"Improvement: {results['mean_reward'] - random_results['mean_reward']:.2f}")
    
    # Plot comparison
    policies = ['Trained', 'Random']
    rewards = [results['mean_reward'], random_results['mean_reward']]
    errors = [results['std_reward'], random_results['std_reward']]
    
    plt.figure(figsize=(8, 6))
    bars = plt.bar(policies, rewards, yerr=errors, capsize=5, 
                   color=['green', 'red'], alpha=0.7)
    plt.ylabel('Mean Episode Reward')
    plt.title('Trained vs Random Policy Performance')
    plt.grid(True, alpha=0.3)
    
    # Add value labels on bars
    for bar, reward in zip(bars, rewards):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                f'{reward:.2f}', ha='center', va='bottom')
    
    plt.show()
else:
    print("❌ Cannot compare - trained model results not available")

## Environment Statistics

In [None]:
# Get environment statistics
env_stats = play_env.get_statistics()
print("\n🏆 Environment Statistics:")
for key, value in env_stats.items():
    print(f"{key}: {value}")

## Cleanup

In [None]:
# Cleanup (if needed)
print("\n🧹 Evaluation completed!")
if hasattr(play_env, 'video_frames'):
    print(f"Videos saved to: {play_env.video_path}")
print("You can now close the Isaac Lab simulator window.")