# MuJoCo Environment Benchmarks
This is **Vibe coded** to save time 

Comparing simulation and PPO training speeds across different environment configurations.

In [1]:
# === IMPORTS & CONFIG ===
import mujoco
import mujoco.mjx as mjx
import jax
import time
import numpy as np
import os
import gymnasium
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from dataclasses import dataclass
from typing import Callable, Optional

# Configuration
PROJECT_ROOT = r"C:\GitHub\training-lucy"
XML_PATH = os.path.join(PROJECT_ROOT, "animals", "ant.xml")
SIM_STEPS = 100_000
TRAIN_TIMESTEPS = 50_000

# PPO hyperparameters
PPO_PARAMS = dict(
    verbose=0, device="cpu", n_steps=2048, n_epochs=10,
    gamma=0.99, gae_lambda=0.95, clip_range=0.2,
    ent_coef=0.0, learning_rate=3e-4
)

Failed to import warp: No module named 'warp'
Failed to import mujoco_warp: No module named 'warp'


In [2]:
# === BENCHMARK UTILITIES ===

def timed(func: Callable, *args, **kwargs) -> tuple:
    """Run function and return (result, elapsed_seconds)."""
    start = time.perf_counter()
    result = func(*args, **kwargs)
    return result, time.perf_counter() - start

def print_results(results: dict, title: str, unit: str = "steps/sec"):
    """Print sorted benchmark results with winner marked."""
    print("\n" + "=" * 60)
    print(f"{title} ({unit}, higher = better)")
    print("=" * 60)
    sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
    for i, (name, speed) in enumerate(sorted_results):
        marker = " üèÜ FASTEST" if i == 0 else ""
        if "ts/sec" in unit or "timesteps" in unit.lower():
            time_1m = 1_000_000 / speed / 60
            print(f"  {name:25s}: {speed:7.0f} {unit}  (~{time_1m:.1f} min for 1M){marker}")
        else:
            print(f"  {name:25s}: {speed:7.0f} {unit}{marker}")
    return sorted_results[0][0]

def create_env(n_envs: int = 1, vec_cls: Optional[type] = None):
    """Create single or vectorized environment."""
    if n_envs == 1 and vec_cls is None:
        return gymnasium.make("Ant-v5", xml_file=XML_PATH)
    return make_vec_env("Ant-v5", n_envs=n_envs, 
                        env_kwargs={"xml_file": XML_PATH},
                        vec_env_cls=vec_cls or DummyVecEnv)

In [3]:
# === SIMULATION SPEED BENCHMARK ===
print("=" * 60)
print(f"SIMULATION SPEED BENCHMARK ({SIM_STEPS:,} steps each)")
print("=" * 60)

sim_results = {}

# 1. MuJoCo Native
print("\n[1/5] MuJoCo Native (C)...")
mj_model = mujoco.MjModel.from_xml_path(XML_PATH)
mj_data = mujoco.MjData(mj_model)

start = time.perf_counter()
for _ in range(SIM_STEPS):
    mujoco.mj_step(mj_model, mj_data)
elapsed = time.perf_counter() - start
sim_results["MuJoCo Native"] = SIM_STEPS / elapsed
print(f"   {sim_results['MuJoCo Native']:.0f} steps/sec")

# 2. MJX + JAX CPU
print("\n[2/5] MJX + JAX CPU (JIT)...")
mjx_model = mjx.put_model(mj_model)
mjx_data = mjx.make_data(mj_model)

@jax.jit
def mjx_step(m, d):
    return mjx.step(m, d)

# Warmup
mjx_data = mjx_step(mjx_model, mjx_data)
mjx_data.qpos.block_until_ready()

start = time.perf_counter()
for _ in range(SIM_STEPS):
    mjx_data = mjx_step(mjx_model, mjx_data)
mjx_data.qpos.block_until_ready()
elapsed = time.perf_counter() - start
sim_results["MJX JAX CPU"] = SIM_STEPS / elapsed
print(f"   {sim_results['MJX JAX CPU']:.0f} steps/sec")

# 3. Gymnasium Single
print("\n[3/5] Gymnasium Single...")
env = create_env(1)
obs, _ = env.reset()

start = time.perf_counter()
for _ in range(SIM_STEPS):
    action = env.action_space.sample()
    obs, _, term, trunc, _ = env.step(action)
    if term or trunc:
        obs, _ = env.reset()
elapsed = time.perf_counter() - start
sim_results["Gymnasium Single"] = SIM_STEPS / elapsed
print(f"   {sim_results['Gymnasium Single']:.0f} steps/sec")
env.close()

# 4-8. VecEnv (4-8 parallel)
for idx, n in enumerate(range(4, 9)):
    print(f"\n[{4 + idx}/8] VecEnv ({n} parallel)...")
    vec_env = create_env(n, DummyVecEnv)
    obs = vec_env.reset()
    
    start = time.perf_counter()
    for _ in range(SIM_STEPS // n):
        actions = np.array([vec_env.action_space.sample() for _ in range(n)])
        vec_env.step(actions)
    elapsed = time.perf_counter() - start
    
    sim_results[f"VecEnv ({n})"] = SIM_STEPS / elapsed
    print(f"   {sim_results[f'VecEnv ({n})']:.0f} steps/sec")
    vec_env.close()

winner = print_results(sim_results, "SIMULATION RESULTS", "steps/sec")
print(f"\n‚úÖ Fastest simulation: {winner}")

SIMULATION SPEED BENCHMARK (100,000 steps each)

[1/5] MuJoCo Native (C)...
   38941 steps/sec

[2/5] MJX + JAX CPU (JIT)...
   1885 steps/sec

[3/5] Gymnasium Single...
   5867 steps/sec

[4/8] VecEnv (4 parallel)...
   5503 steps/sec

[5/8] VecEnv (5 parallel)...
   5569 steps/sec

[6/8] VecEnv (6 parallel)...
   5578 steps/sec

[7/8] VecEnv (7 parallel)...
   5584 steps/sec

[8/8] VecEnv (8 parallel)...
   5583 steps/sec

SIMULATION RESULTS (steps/sec, higher = better)
  MuJoCo Native            :   38941 steps/sec üèÜ FASTEST
  Gymnasium Single         :    5867 steps/sec
  VecEnv (7)               :    5584 steps/sec
  VecEnv (8)               :    5583 steps/sec
  VecEnv (6)               :    5578 steps/sec
  VecEnv (5)               :    5569 steps/sec
  VecEnv (4)               :    5503 steps/sec
  MJX JAX CPU              :    1885 steps/sec

‚úÖ Fastest simulation: MuJoCo Native


In [4]:
# === PPO TRAINING BENCHMARK ===
print("=" * 60)
print(f"PPO TRAINING BENCHMARK ({TRAIN_TIMESTEPS:,} timesteps each)")
print("=" * 60)

train_results = {}

def benchmark_ppo(name: str, n_envs: int, vec_cls: Optional[type], batch_size: int):
    """Benchmark PPO training with given environment config."""
    print(f"\n  Testing: {name}...")
    env = create_env(n_envs, vec_cls)
    
    model = PPO("MlpPolicy", env, batch_size=batch_size, **PPO_PARAMS)
    _, elapsed = timed(model.learn, total_timesteps=TRAIN_TIMESTEPS)
    
    speed = TRAIN_TIMESTEPS / elapsed
    train_results[name] = speed
    print(f"   {speed:.0f} ts/sec ({elapsed:.1f}s)")
    
    env.close()
    del model

# Single env
benchmark_ppo("Single Env", 1, None, 64)

# DummyVecEnv (sequential)
for n in [2, 4, 6, 8]:
    benchmark_ppo(f"DummyVecEnv ({n})", n, DummyVecEnv, min(256, n * 64))

# SubprocVecEnv (multiprocessing) - 4 to 8 CPUs
for n in range(4, 9):
    benchmark_ppo(f"SubprocVecEnv ({n})", n, SubprocVecEnv, 256)

winner = print_results(train_results, "PPO TRAINING RESULTS", "ts/sec")
print(f"\n‚úÖ Best training config: {winner}")

# SubprocVecEnv scaling chart
print("\n" + "=" * 60)
print("SUBPROC SCALING (4-8 CPUs)")
print("=" * 60)
for n in range(4, 9):
    key = f"SubprocVecEnv ({n})"
    if key in train_results:
        speed = train_results[key]
        bar = "‚ñà" * int(speed / 100)
        print(f"  {n} CPUs: {speed:5.0f} ts/sec {bar}")

PPO TRAINING BENCHMARK (50,000 timesteps each)

  Testing: Single Env...
   1319 ts/sec (37.9s)

  Testing: DummyVecEnv (2)...
   1655 ts/sec (30.2s)

  Testing: DummyVecEnv (4)...
   1431 ts/sec (34.9s)

  Testing: DummyVecEnv (6)...
   1490 ts/sec (33.6s)

  Testing: DummyVecEnv (8)...
   1441 ts/sec (34.7s)

  Testing: SubprocVecEnv (4)...
   1879 ts/sec (26.6s)

  Testing: SubprocVecEnv (5)...
   2968 ts/sec (16.8s)

  Testing: SubprocVecEnv (6)...
   2655 ts/sec (18.8s)

  Testing: SubprocVecEnv (7)...
   3011 ts/sec (16.6s)

  Testing: SubprocVecEnv (8)...
   2510 ts/sec (19.9s)

PPO TRAINING RESULTS (ts/sec, higher = better)
  SubprocVecEnv (7)        :    3011 ts/sec  (~5.5 min for 1M) üèÜ FASTEST
  SubprocVecEnv (5)        :    2968 ts/sec  (~5.6 min for 1M)
  SubprocVecEnv (6)        :    2655 ts/sec  (~6.3 min for 1M)
  SubprocVecEnv (8)        :    2510 ts/sec  (~6.6 min for 1M)
  SubprocVecEnv (4)        :    1879 ts/sec  (~8.9 min for 1M)
  DummyVecEnv (2)          :    