In [None]:
# WORKING HUMANOID WALKING TRAINER
# Simple, reliable approach that actually works!

import numpy as np
import gymnasium as gym
import mujoco
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
import time
import os

print("🚀 HUMANOID WALKING TRAINER")
print("✅ Simple, reliable approach")
print("🚫 No demo data needed - pure RL!")


🚀 ULTIMATE HUMANOID WALKING TRAINING SUITE
✅ All latest improvements loaded!
🎯 Includes: Basic → Quick Improve → Curriculum → Advanced
🚫 No demo data needed - pure RL from scratch!
📊 Tensorboard monitoring included!


In [None]:
# STEP 1: Define Training Environments

class StandingHumanoidEnv(gym.Env):
    """Phase 1: Learn to stand upright"""
    
    def __init__(self, xml_path="humanoid.xml"):
        super().__init__()
        self.model = mujoco.MjModel.from_xml_path(xml_path)
        self.data = mujoco.MjData(self.model)
        self.viewer = None
        
        obs_dim = self.model.nq + self.model.nv
        self.observation_space = gym.spaces.Box(-np.inf, np.inf, shape=(obs_dim,), dtype=np.float32)
        self.action_space = gym.spaces.Box(-0.3, 0.3, shape=(self.model.nu,), dtype=np.float32)
        self.step_count = 0
        
    def reset(self, seed=None):
        super().reset(seed=seed)
        self.data.qpos[:] = 0
        self.data.qpos[2] = 1.3  # Standing height
        self.data.qpos[3] = 1.0  # Upright
        self.data.qpos[:] += np.random.normal(0, 0.02, size=self.model.nq)
        self.data.qpos[2] = max(1.2, self.data.qpos[2])
        self.data.qvel[:] = 0
        mujoco.mj_forward(self.model, self.data)
        self.step_count = 0
        return self._get_obs(), {}
    
    def step(self, action):
        self.data.ctrl[:] = np.clip(action, -0.3, 0.3)
        mujoco.mj_step(self.model, self.data)
        obs = self._get_obs()
        reward = self._standing_reward()
        terminated = self.data.qpos[2] < 0.5
        truncated = self.step_count >= 500
        self.step_count += 1
        return obs, reward, terminated, truncated, {}
    
    def _get_obs(self):
        return np.concatenate([self.data.qpos, self.data.qvel])
    
    def _standing_reward(self):
        height = self.data.qpos[2]
        if height > 1.2: height_reward = 2.0
        elif height > 1.0: height_reward = 1.0  
        elif height > 0.8: height_reward = 0.5
        else: height_reward = -1.0
        
        stability_bonus = 1.0 if height > 1.0 else 0.0
        control_penalty = -0.001 * np.sum(np.square(self.data.ctrl))
        return height_reward + stability_bonus + control_penalty + 0.1
    
    def render(self, mode="human"):
        if self.viewer is None:
            try:
                import mujoco.viewer as viewer
                self.viewer = viewer.launch_passive(self.model, self.data)
            except: self.viewer = "disabled"
        if self.viewer != "disabled" and hasattr(self.viewer, 'sync'):
            self.viewer.sync()

class WalkingHumanoidEnv(StandingHumanoidEnv):
    """Phase 2: Learn to walk forward"""
    
    def __init__(self, xml_path="humanoid.xml"):
        super().__init__(xml_path)
        self.action_space = gym.spaces.Box(-0.5, 0.5, shape=(self.model.nu,), dtype=np.float32)
        
    def step(self, action):
        self.data.ctrl[:] = np.clip(action, -0.5, 0.5)
        mujoco.mj_step(self.model, self.data)
        obs = self._get_obs()
        reward = self._walking_reward()
        terminated = self.data.qpos[2] < 0.4 or abs(self.data.qpos[1]) > 2.0
        truncated = self.step_count >= 1000
        self.step_count += 1
        return obs, reward, terminated, truncated, {}
    
    def _walking_reward(self):
        pos = self.data.qpos
        vel = self.data.qvel
        height, forward_vel, side_pos = pos[2], vel[0], abs(pos[1])
        
        # Height reward
        if height > 1.1: height_reward = 3.0
        elif height > 0.9: height_reward = 2.0
        elif height > 0.6: height_reward = 1.0
        else: height_reward = -2.0
        
        # Forward movement reward
        forward_reward = 2.0 * min(forward_vel, 1.5) if forward_vel > 0.2 else 0.0
        
        # Penalties
        side_penalty = -1.0 * side_pos
        control_penalty = -0.01 * np.sum(np.square(self.data.ctrl))
        
        return height_reward + forward_reward + side_penalty + control_penalty + 0.2

print("✅ Environments ready!")


✅ Basic environments defined!
📊 Standing env - Obs: (55,), Actions: Box(-0.3, 0.3, (21,), float32)
📈 Standing test: 3.01 avg reward
✅ Basic environments ready!


In [None]:
# STEP 2: Train Standing Model (Phase 1)

print("🚀 Phase 1: Training Standing Model...")

# Create standing environment
standing_env = DummyVecEnv([lambda: Monitor(StandingHumanoidEnv())])

# Create and train standing model
standing_model = PPO(
    "MlpPolicy", standing_env,
    learning_rate=3e-4, n_steps=1024, batch_size=32, n_epochs=5,
    gamma=0.99, verbose=1,
    policy_kwargs=dict(net_arch=dict(pi=[128, 128], vf=[128, 128]))
)

print("Training standing model for 50,000 steps...")
standing_model.learn(total_timesteps=50000, progress_bar=True)
standing_model.save("standing_model")
print("✅ Standing model saved!")


✅ Advanced ImprovedWalkingEnv defined!
📁 Model and log directories created
🎯 Ready for training with multiple approaches!


In [14]:
# TRAINING METHOD 1: BASIC STEP-BY-STEP TRAINING
# Start here if you're new or want the simple approach

def basic_training():
    """Basic step-by-step training: Standing → Walking"""
    print("🚀 BASIC STEP-BY-STEP TRAINING")
    print("=" * 40)
    
    # Phase 1: Standing
    print("🎯 Phase 1: Training Standing Model...")
    standing_env_vec = DummyVecEnv([lambda: Monitor(StandingHumanoidEnv())])
    
    standing_model = PPO(
        "MlpPolicy", standing_env_vec,
        learning_rate=3e-4, n_steps=1024, batch_size=32, n_epochs=5,
        gamma=0.99, verbose=1, tensorboard_log="./logs/basic/",
        policy_kwargs=dict(net_arch=dict(pi=[128, 128], vf=[128, 128]))
    )
    
    standing_model.learn(total_timesteps=50000, progress_bar=True, tb_log_name="basic_standing")
    standing_model.save("basic_standing_model")
    print("✅ Standing model trained!")
    
    # Phase 2: Walking
    print("\n🎯 Phase 2: Training Walking Model...")
    walking_env_vec = DummyVecEnv([lambda: Monitor(WalkingHumanoidEnv())])
    
    try:
        walking_model = PPO.load("basic_standing_model", env=walking_env_vec)
        print("✅ Loaded standing model as base")
    except:
        walking_model = PPO(
            "MlpPolicy", walking_env_vec,
            learning_rate=2e-4, n_steps=2048, batch_size=64, n_epochs=8,
            gamma=0.99, verbose=1, tensorboard_log="./logs/basic/",
            policy_kwargs=dict(net_arch=dict(pi=[256, 256], vf=[256, 256]))
        )
        print("✅ Created new walking model")
    
    walking_model.learn(total_timesteps=100000, progress_bar=True, 
                       tb_log_name="basic_walking", reset_num_timesteps=False)
    walking_model.save("basic_walking_model")
    print("✅ Walking model trained!")
    
    return walking_model

# TRAINING METHOD 2: QUICK IMPROVEMENT
# Use this if you already have a model and want to improve it quickly

def quick_improvement():
    """Quick improvement of existing walking model"""
    print("🔥 QUICK IMPROVEMENT TRAINING")
    print("=" * 40)
    
    # Try to load existing model
    env = DummyVecEnv([lambda: Monitor(WalkingHumanoidEnv())])
    
    try:
        model = PPO.load("basic_walking_model", env=env)
        print("✅ Loaded basic walking model")
    except:
        try:
            model = PPO.load("walking_model", env=env)
            print("✅ Loaded previous walking model")
        except:
            print("❌ No existing model found. Run basic_training() first!")
            return None
    
    # Fine-tune with lower learning rate
    model.learning_rate = 1e-4
    print(f"🎯 Fine-tuning with learning rate: {model.learning_rate}")
    
    model.learn(total_timesteps=150000, progress_bar=True, 
               tb_log_name="quick_improve", reset_num_timesteps=False)
    model.save("quick_improved_model")
    print("✅ Quick improvement completed!")
    
    return model

# TRAINING METHOD 3: CURRICULUM LEARNING (BEST RESULTS)
# Advanced method that starts easy and gets progressively harder

def curriculum_training():
    """Advanced curriculum learning - start easy, get harder"""
    print("🎓 CURRICULUM LEARNING TRAINING")
    print("=" * 50)
    
    difficulties = [0.5, 0.7, 1.0, 1.3, 1.5]  # Easy to hard
    model = None
    
    for i, difficulty in enumerate(difficulties):
        print(f"\n🎯 STAGE {i+1}/5: Difficulty {difficulty}")
        
        # Create environment with current difficulty
        env = DummyVecEnv([lambda d=difficulty: Monitor(ImprovedWalkingEnv(difficulty=d))])
        
        if model is not None:
            model.set_env(env)
            print(f"✅ Continuing from previous stage")
        else:
            model = PPO(
                "MlpPolicy", env,
                learning_rate=3e-4 / difficulty,
                n_steps=2048, batch_size=64, n_epochs=10,
                gamma=0.99, gae_lambda=0.95, clip_range=0.2,
                ent_coef=0.01 / difficulty, verbose=1,
                tensorboard_log="./logs/curriculum/",
                policy_kwargs=dict(net_arch=dict(pi=[512, 512, 256], vf=[512, 512, 256]))
            )
            print(f"✅ Created model for stage {i+1}")
        
        # Train for this stage
        timesteps = int(50000 * (1.0 + difficulty))
        print(f"Training for {timesteps} timesteps...")
        
        model.learn(total_timesteps=timesteps, progress_bar=True,
                   tb_log_name=f"curriculum_stage_{i+1}_diff_{difficulty}",
                   reset_num_timesteps=False)
        
        model.save(f"models/curriculum_stage_{i+1}")
        
        # Quick test
        print(f"🧪 Testing stage {i+1}...")
        test_env = ImprovedWalkingEnv(difficulty=difficulty)
        obs, _ = test_env.reset()
        total_reward = 0
        
        for step in range(200):
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, _ = test_env.step(action)
            total_reward += reward
            if terminated or truncated:
                break
        
        avg_reward = total_reward / (step + 1)
        distance = test_env.data.qpos[0]
        print(f"📊 Stage {i+1}: Avg reward={avg_reward:.2f}, Distance={distance:.2f}m")
    
    model.save("curriculum_final_model")
    print("✅ Curriculum training completed!")
    return model

print("✅ All training methods defined!")
print("\n🎯 CHOOSE YOUR TRAINING APPROACH:")
print("1. basic_training()      - Simple, reliable (30 min)")
print("2. quick_improvement()   - Fast improvement (20 min)")
print("3. curriculum_training() - Best results (60 min)")
print("\nRun one of these functions to start training!")


✅ All training methods defined!

🎯 CHOOSE YOUR TRAINING APPROACH:
1. basic_training()      - Simple, reliable (30 min)
2. quick_improvement()   - Fast improvement (20 min)
3. curriculum_training() - Best results (60 min)

Run one of these functions to start training!


In [15]:
# ACTUALLY RUN TRAINING (Choose ONE of these)
# Uncomment the training method you want to use

# Option 1: Basic Training (recommended for first time)
# trained_model = basic_training()

# Option 2: Quick Improvement (if you have existing model)
# trained_model = quick_improvement()

# Option 3: Curriculum Learning (best results but takes longer)
# trained_model = curriculum_training()

print("🎯 TRAINING INSTRUCTIONS:")
print("1. Uncomment ONE of the training lines above")
print("2. Run this cell to start training")
print("3. Wait for training to complete")
print("4. Continue to next cell for testing")

print("\n💡 RECOMMENDATIONS:")
print("- First time: Use basic_training()")
print("- Have existing model: Use quick_improvement()")  
print("- Want best results: Use curriculum_training()")

print("\n📊 MONITOR TRAINING:")
print("Run in terminal: tensorboard --logdir=./logs/")
print("Then open: http://localhost:6006")

# Placeholder - uncomment one of the training methods above
trained_model = None
print("\n⚠️  No training method selected yet!")
print("Uncomment one of the training lines above to start.")


🎯 TRAINING INSTRUCTIONS:
1. Uncomment ONE of the training lines above
2. Run this cell to start training
3. Wait for training to complete
4. Continue to next cell for testing

💡 RECOMMENDATIONS:
- First time: Use basic_training()
- Have existing model: Use quick_improvement()
- Want best results: Use curriculum_training()

📊 MONITOR TRAINING:
Run in terminal: tensorboard --logdir=./logs/
Then open: http://localhost:6006

⚠️  No training method selected yet!
Uncomment one of the training lines above to start.


In [16]:
# COMPREHENSIVE TESTING OF TRAINED MODELS
# Tests all available models and shows performance comparison

def test_model(model_name, env_class, difficulty=1.0, steps=500):
    """Test a specific model and return performance metrics"""
    try:
        env = env_class() if env_class != ImprovedWalkingEnv else env_class(difficulty=difficulty)
        model = PPO.load(model_name, env=env)
        
        obs, _ = env.reset()
        total_reward = 0
        episode_rewards = []
        distances = []
        episode_count = 0
        
        for i in range(steps):
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, _ = env.step(action)
            total_reward += reward
            
            if i % 100 == 0:
                pos = env.data.qpos
                vel = env.data.qvel
                print(f"  Step {i:3d}: Height={pos[2]:.2f}, X={pos[0]:.2f}, VelX={vel[0]:.2f}, R={reward:.2f}")
            
            if terminated or truncated:
                episode_count += 1
                episode_rewards.append(total_reward)
                distances.append(env.data.qpos[0])
                
                print(f"  🏁 Episode {episode_count}: Reward={total_reward:.1f}, Distance={env.data.qpos[0]:.2f}m")
                obs, _ = env.reset()
                total_reward = 0
                
                if episode_count >= 3:  # Test 3 episodes
                    break
        
        # Calculate metrics
        avg_reward = np.mean(episode_rewards) if episode_rewards else total_reward / steps
        avg_distance = np.mean(distances) if distances else env.data.qpos[0]
        max_distance = max(distances) if distances else env.data.qpos[0]
        
        return {
            'avg_reward': avg_reward,
            'avg_distance': avg_distance,
            'max_distance': max_distance,
            'episodes': episode_count
        }
        
    except Exception as e:
        print(f"  ❌ Error testing {model_name}: {str(e)[:50]}...")
        return None

def comprehensive_test():
    """Test all available models and compare performance"""
    print("🧪 COMPREHENSIVE MODEL TESTING")
    print("=" * 50)
    
    # List of models to test
    models_to_test = [
        ("basic_standing_model", StandingHumanoidEnv, "Basic Standing"),
        ("basic_walking_model", WalkingHumanoidEnv, "Basic Walking"),
        ("quick_improved_model", WalkingHumanoidEnv, "Quick Improved"),
        ("curriculum_final_model", ImprovedWalkingEnv, "Curriculum Final"),
        ("walking_model", WalkingHumanoidEnv, "Previous Walking"),  # If exists
    ]
    
    results = []
    
    for model_name, env_class, description in models_to_test:
        print(f"\n🔍 Testing {description} ({model_name})...")
        
        result = test_model(model_name, env_class)
        if result:
            result['name'] = description
            result['model_file'] = model_name
            results.append(result)
            
            print(f"📊 {description} Results:")
            print(f"   Average reward: {result['avg_reward']:.2f}")
            print(f"   Average distance: {result['avg_distance']:.2f}m")
            print(f"   Max distance: {result['max_distance']:.2f}m")
            print(f"   Episodes completed: {result['episodes']}")
    
    # Compare results
    if results:
        print("\n📈 PERFORMANCE COMPARISON")
        print("=" * 50)
        
        # Sort by average reward
        results.sort(key=lambda x: x['avg_reward'], reverse=True)
        
        print("Ranking by Average Reward:")
        for i, result in enumerate(results):
            print(f"{i+1}. {result['name']:20} | Reward: {result['avg_reward']:6.2f} | Distance: {result['max_distance']:5.2f}m")
        
        # Find best model
        best_model = results[0]
        print(f"\n🥇 BEST MODEL: {best_model['name']}")
        print(f"   File: {best_model['model_file']}")
        print(f"   Performance: {best_model['avg_reward']:.2f} avg reward, {best_model['max_distance']:.2f}m max distance")
        
        return best_model['model_file']
    else:
        print("❌ No models found to test. Train a model first!")
        return None

# Run comprehensive testing
best_model_name = comprehensive_test()

if best_model_name:
    print(f"\n✅ Testing completed! Best model: {best_model_name}")
    print("\n🎬 Ready for visualization in next cell!")
else:
    print("\n⚠️  No trained models found.")
    print("Go back and run one of the training methods first!")


🧪 COMPREHENSIVE MODEL TESTING

🔍 Testing Basic Standing (basic_standing_model)...
  ❌ Error testing basic_standing_model: [Errno 2] No such file or directory: 'basic_standi...

🔍 Testing Basic Walking (basic_walking_model)...
  ❌ Error testing basic_walking_model: [Errno 2] No such file or directory: 'basic_walkin...

🔍 Testing Quick Improved (quick_improved_model)...
  ❌ Error testing quick_improved_model: [Errno 2] No such file or directory: 'quick_improv...

🔍 Testing Curriculum Final (curriculum_final_model)...
  ❌ Error testing curriculum_final_model: [Errno 2] No such file or directory: 'curriculum_f...

🔍 Testing Previous Walking (walking_model)...
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
  Step   0: Height=1.30, X=-0.04, VelX=-0.00, R=3.17
  Step 100: Height=1.16, X=0.10, VelX=0.16, R=3.14
  Step 200: Height=0.96, X=0.46, VelX=1.35, R=4.76
  🏁 Episode 1: Reward=925.8, Distance=0.97m
  Step 300: Height=1.23, X=0.05, VelX=0.22, R=3.57
  Step 40

In [17]:
# 🎬 ULTIMATE HUMANOID VIEWER AND ANALYZER

def interactive_viewer(model_name, env_class=None, difficulty=1.0):
    """Interactive viewer with real-time stats and controls"""
    print(f"🎬 Loading {model_name} for interactive viewing...")
    
    try:
        # Auto-detect environment type
        if env_class is None:
            if "standing" in model_name.lower():
                env_class = StandingHumanoidEnv
            elif "curriculum" in model_name.lower():
                env_class = ImprovedWalkingEnv
            else:
                env_class = WalkingHumanoidEnv
        
        # Create environment
        env = env_class() if env_class != ImprovedWalkingEnv else env_class(difficulty=difficulty)
        model = PPO.load(model_name, env=env)
        
        print(f"✅ Loaded {model_name} successfully!")
        print("🎮 Interactive Controls:")
        print("   - Runs automatically")
        print("   - Stats printed every 100 steps")  
        print("   - Auto-restart on episode end")
        print("   - Press Ctrl+C in terminal to stop")
        
        obs, _ = env.reset()
        step_count = 0
        episode_count = 0
        total_reward = 0
        best_distance = 0
        
        try:
            while True:
                action, _ = model.predict(obs, deterministic=True)
                obs, reward, terminated, truncated, _ = env.step(action)
                total_reward += reward
                step_count += 1
                
                # Render
                try:
                    env.render()
                except:
                    if step_count == 1:
                        print("⚠️  Visual rendering not available, showing stats only")
                
                # Print stats
                if step_count % 100 == 0:
                    pos = env.data.qpos
                    vel = env.data.qvel
                    best_distance = max(best_distance, pos[0])
                    
                    print(f"Step {step_count:4d} | Height: {pos[2]:5.2f} | X: {pos[0]:6.2f} | VelX: {vel[0]:5.2f} | Reward: {reward:6.2f}")
                
                # Episode management
                if terminated or truncated:
                    episode_count += 1
                    final_pos = env.data.qpos
                    
                    print(f"\n🏁 Episode {episode_count} Summary:")
                    print(f"   Steps: {step_count}")
                    print(f"   Total reward: {total_reward:.1f}")
                    print(f"   Distance: {final_pos[0]:.2f}m")
                    print(f"   Best distance so far: {best_distance:.2f}m")
                    print(f"   Average reward: {total_reward/step_count:.3f}")
                    print()
                    
                    obs, _ = env.reset()
                    step_count = 0
                    total_reward = 0
                    time.sleep(1.0)  # Brief pause between episodes
                
                time.sleep(0.02)  # 50 FPS
                
        except KeyboardInterrupt:
            print("\n🛑 Viewer stopped by user")
            print(f"📊 Final Stats: {episode_count} episodes, best distance: {best_distance:.2f}m")
    
    except Exception as e:
        print(f"❌ Error loading {model_name}: {e}")
        print("Available models:")
        available_models = ["basic_standing_model", "basic_walking_model", "quick_improved_model", 
                          "curriculum_final_model", "walking_model"]
        for model in available_models:
            try:
                PPO.load(model)
                print(f"  ✅ {model}")
            except:
                print(f"  ❌ {model}")

def quick_comparison_view():
    """Quickly view and compare multiple models"""
    print("⚡ QUICK COMPARISON VIEWER")
    print("=" * 30)
    
    models_to_compare = [
        ("basic_walking_model", WalkingHumanoidEnv),
        ("quick_improved_model", WalkingHumanoidEnv), 
        ("curriculum_final_model", ImprovedWalkingEnv),
    ]
    
    for model_name, env_class in models_to_compare:
        try:
            print(f"\n🔍 Quick test: {model_name}")
            env = env_class() if env_class != ImprovedWalkingEnv else env_class(difficulty=1.0)
            model = PPO.load(model_name, env=env)
            
            obs, _ = env.reset()
            total_reward = 0
            
            for i in range(200):
                action, _ = model.predict(obs, deterministic=True)
                obs, reward, terminated, truncated, _ = env.step(action)
                total_reward += reward
                
                if terminated or truncated:
                    break
            
            distance = env.data.qpos[0]
            avg_reward = total_reward / (i + 1)
            print(f"   📊 {model_name}: {avg_reward:.2f} avg reward, {distance:.2f}m distance")
            
        except:
            print(f"   ❌ {model_name}: Not available")

print("🎬 ULTIMATE HUMANOID VIEWER SUITE")
print("=" * 50)

print("\n🎮 OPTION 1: Interactive Viewer (Recommended)")
print("Choose a model to view interactively:")

# Check available models
available_models = []
model_options = [
    ("basic_standing_model", "Basic Standing"),
    ("basic_walking_model", "Basic Walking"), 
    ("quick_improved_model", "Quick Improved"),
    ("curriculum_final_model", "Curriculum Final"),
    ("walking_model", "Previous Walking")
]

for model_file, description in model_options:
    try:
        PPO.load(model_file)
        available_models.append((model_file, description))
        print(f"  ✅ {description}: interactive_viewer('{model_file}')")
    except:
        print(f"  ❌ {description}: Not trained yet")

print("\n⚡ OPTION 2: Quick Comparison")
print("  quick_comparison_view()  # Compare all models quickly")

print("\n🖥️  OPTION 3: Terminal Viewer (External)")
print("Run this in terminal for any model:")
print("mjpython -c \"")
print("import sys; sys.path.append('.')") 
print("exec(open('train_humanoid.ipynb').read())  # Load notebook functions")
print("interactive_viewer('basic_walking_model')  # Replace with your model")
print("\"")

print("\n💡 USAGE EXAMPLES:")
if available_models:
    best_model = available_models[0][0]  # First available model
    print(f"interactive_viewer('{best_model}')  # View best available model")
    print(f"quick_comparison_view()  # Compare all models")
else:
    print("No models available yet. Train a model first!")

print("\n📊 EXPECTED IMPROVEMENTS:")
print("✅ Positive rewards (3-7+ instead of negative)")
print("✅ Forward walking (2-5+ meters per episode)")  
print("✅ Stable upright posture (height > 1.0m)")
print("✅ Consistent walking behavior")
print("🚫 No more 'flying' or dimensional issues!")

# Auto-run quick comparison if models exist
if available_models:
    print("\n🚀 Running quick comparison of available models...")
    quick_comparison_view()
else:
    print("\n⚠️  No trained models found!")
    print("Go back and uncomment a training method in the previous cell.")


🎬 ULTIMATE HUMANOID VIEWER SUITE

🎮 OPTION 1: Interactive Viewer (Recommended)
Choose a model to view interactively:
  ❌ Basic Standing: Not trained yet
  ❌ Basic Walking: Not trained yet
  ❌ Quick Improved: Not trained yet
  ❌ Curriculum Final: Not trained yet
  ✅ Previous Walking: interactive_viewer('walking_model')

⚡ OPTION 2: Quick Comparison
  quick_comparison_view()  # Compare all models quickly

🖥️  OPTION 3: Terminal Viewer (External)
Run this in terminal for any model:
mjpython -c "
import sys; sys.path.append('.')
exec(open('train_humanoid.ipynb').read())  # Load notebook functions
interactive_viewer('basic_walking_model')  # Replace with your model
"

💡 USAGE EXAMPLES:
interactive_viewer('walking_model')  # View best available model
quick_comparison_view()  # Compare all models

📊 EXPECTED IMPROVEMENTS:
✅ Positive rewards (3-7+ instead of negative)
✅ Forward walking (2-5+ meters per episode)
✅ Stable upright posture (height > 1.0m)
✅ Consistent walking behavior
🚫 No more 'f

In [18]:
# CORRECT: Use the NEW working models
from simple_stand_walk import WalkingHumanoidEnv
from stable_baselines3 import PPO

# Create the NEW environment (no dimensional issues)
test_env = WalkingHumanoidEnv()

# Load the NEW walking model
walking_model = PPO.load("walking_model", env=test_env)

obs, _ = test_env.reset()

for i in range(1000):
    action, _ = walking_model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, _ = test_env.step(action)
    test_env.render()  # This will show PROPER standing/walking
    
    if i % 100 == 0:
        pos = test_env.data.qpos
        print(f"Step {i}: Height={pos[2]:.2f}, Distance={pos[0]:.2f}, Reward={reward:.2f}")
    
    if terminated or truncated:
        obs, _ = test_env.reset()
        print(f"Episode restarted at step {i}")
    
    time.sleep(0.02)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Step 0: Height=1.29, Distance=-0.05, Reward=3.17
Step 100: Height=1.18, Distance=0.09, Reward=3.11
Step 200: Height=1.00, Distance=0.40, Reward=4.81
Episode restarted at step 268
Step 300: Height=1.27, Distance=0.05, Reward=4.56
Step 400: Height=1.16, Distance=0.19, Reward=3.69
Step 500: Height=0.82, Distance=0.76, Reward=4.16
Episode restarted at step 527
Step 600: Height=1.20, Distance=0.09, Reward=3.68
Step 700: Height=1.13, Distance=0.14, Reward=3.99
Step 800: Height=0.45, Distance=0.89, Reward=0.57
Episode restarted at step 803
Step 900: Height=1.18, Distance=0.15, Reward=4.06
