In [1]:
%%capture
%pip install mujoco
%pip install mujoco_mjx
%pip install ml_collections

# Clone and install the repository properly
!rm -rf /home/jovyan/mujoco_playground
!git clone https://github.com/Itssshikhar/mujoco_playground.git /home/jovyan/mujoco_playground
%cd /home/jovyan/mujoco_playground
%pip install -e .
%cd /home/jovyan

# Add the repository root to Python path
import sys
import os
sys.path.append(os.path.abspath('/home/jovyan/mujoco_playground'))
os.environ['PYTHONPATH'] = os.path.abspath('/home/jovyan/mujoco_playground') + ':' + os.environ.get('PYTHONPATH', '')

# Set environment variable for mujoco_menagerie path
os.environ['MUJOCO_MENAGERIE_PATH'] = '/home/jovyan/external_deps/mujoco_menagerie'

# Create a minimal mjx_env.py if import fails
try:
    from mujoco_playground._src import mjx_env
    print('Import successful!')
except ImportError:
    print('Creating minimal mjx_env.py...')
    %%writefile /home/jovyan/mujoco_playground/mujoco_playground/_src/mjx_env.py
    """Minimal MJX environment module."""
    import jax
    import mujoco
    from mujoco import mjx
    
    class MJXEnv:
        """Base class for MJX environments."""
        def __init__(self):
            pass

In [2]:
%pip install playground
%pip install -U "jax[cuda12]"

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
!#@title Import Dependencies and Set Environment
import os
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import logging

# Configure logging for Colab
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

In [4]:
%%writefile train_zbot.py

#######################
# Setup & Dependencies
#######################

import argparse
import logging
import pickle
from datetime import datetime
from pathlib import Path

import cv2
import jax
import matplotlib.pyplot as plt
import numpy as np
from ml_collections import config_dict
from playground.zbot import joystick as zbot_joystick
from playground.zbot import randomize as zbot_randomize
from playground.zbot import zbot_constants
from playground.runner import ZBotRunner

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler('zbot_training.log')
    ]
)
logger = logging.getLogger(__name__)

########################
# Training Configuration
########################

def create_training_args(task="flat_terrain", load_existing=False):
    """Create training arguments with enhanced settings"""
    args = argparse.Namespace(
        env="ZbotJoystickFlatTerrain",
        task=task,
        debug=False,
        save_model=True,
        load_model=load_existing,
        seed=42,
        num_episodes=3,
        episode_length=3000,
        x_vel=1.0,
        y_vel=0.0,
        yaw_vel=0.0
    )
    return args

def plot_training_progress(runner, title):
    """Plot training progress with error bands"""
    plt.figure(figsize=(10, 6))
    plt.plot(runner.x_data, runner.y_data, label='Mean Reward')
    plt.fill_between(
        runner.x_data,
        np.array(runner.y_data) - np.array(runner.y_dataerr),
        np.array(runner.y_data) + np.array(runner.y_dataerr),
        alpha=0.2,
        label='Std Dev'
    )
    plt.xlabel('Training Steps')
    plt.ylabel('Episode Reward')
    plt.title(f'Training Progress: {title}')
    plt.grid(True)
    plt.legend()
    plt.savefig(f'{title.lower().replace(" ", "_")}_progress.png')
    plt.close()

def save_training_metrics(runner, filename):
    """Save training metrics for later analysis"""
    metrics = {
        'steps': runner.x_data,
        'rewards': runner.y_data,
        'reward_std': runner.y_dataerr,
        'training_time': (runner.times[-1] - runner.times[0]).total_seconds()
    }
    with open(filename, 'wb') as f:
        pickle.dump(metrics, f)

#############################
# Flat Terrain Training Phase
#############################

def train_flat_terrain():
    """Train the initial policy on flat terrain"""
    logger.info("=" * 50)
    logger.info("Starting flat terrain training phase")
    logger.info("=" * 50)

    # Initialize runner with flat terrain config
    args = create_training_args(task="flat_terrain", load_existing=False)
    logger.info("Training configuration:")
    for key, value in vars(args).items():
        logger.info(f"  {key}: {value}")

    runner = ZBotRunner(args, logger)

    # Train policy
    logger.info("Beginning training loop...")
    runner.train()

    # Log training statistics
    logger.info("Training completed. Final statistics:")
    logger.info(f"  Total steps: {len(runner.x_data)}")
    logger.info(f"  Final reward: {runner.y_data[-1]:.2f} ± {runner.y_dataerr[-1]:.2f}")
    logger.info(f"  Training time: {(runner.times[-1] - runner.times[0]).total_seconds():.2f}s")

    # Plot and save results
    logger.info("Saving training visualizations and metrics...")
    plot_training_progress(runner, "Flat Terrain Training")
    save_training_metrics(runner, "flat_terrain_metrics.pkl")

    # Evaluate policy
    logger.info("Starting flat terrain policy evaluation...")
    runner.evaluate()

    return runner

##############################
# Rough Terrain Training Phase
##############################

def train_rough_terrain(flat_terrain_runner):
    """Adapt the policy to rough terrain"""
    logger.info("=" * 50)
    logger.info("Starting rough terrain adaptation phase")
    logger.info("=" * 50)

    # Initialize runner with rough terrain config
    args = create_training_args(task="rough_terrain", load_existing=True)
    logger.info("Training configuration:")
    for key, value in vars(args).items():
        logger.info(f"  {key}: {value}")

    runner = ZBotRunner(args, logger)

    # Load flat terrain policy
    logger.info("Loading pre-trained flat terrain policy...")
    runner.params = flat_terrain_runner.params

    # Continue training on rough terrain
    logger.info("Beginning rough terrain adaptation...")
    runner.train()

    # Log training statistics
    logger.info("Adaptation completed. Final statistics:")
    logger.info(f"  Total steps: {len(runner.x_data)}")
    logger.info(f"  Final reward: {runner.y_data[-1]:.2f} ± {runner.y_dataerr[-1]:.2f}")
    logger.info(f"  Training time: {(runner.times[-1] - runner.times[0]).total_seconds():.2f}s")

    # Plot and save results
    logger.info("Saving training visualizations and metrics...")
    plot_training_progress(runner, "Rough Terrain Training")
    save_training_metrics(runner, "rough_terrain_metrics.pkl")

    # Evaluate policy
    logger.info("Starting rough terrain policy evaluation...")
    runner.evaluate()

    return runner

#######################
# Analysis & Evaluation
#######################

def analyze_performance(flat_metrics, rough_metrics):
    """Compare and analyze training performance"""
    logger.info("=" * 50)
    logger.info("Performance Analysis")
    logger.info("=" * 50)

    # Print summary statistics
    logger.info("Training Summary:")
    logger.info("Flat Terrain:")
    logger.info(f"  Training time: {flat_metrics['training_time']:.2f}s")
    logger.info(f"  Final reward: {flat_metrics['rewards'][-1]:.2f} ± {flat_metrics['reward_std'][-1]:.2f}")
    logger.info(f"  Peak reward: {max(flat_metrics['rewards']):.2f}")

    logger.info("Rough Terrain:")
    logger.info(f"  Training time: {rough_metrics['training_time']:.2f}s")
    logger.info(f"  Final reward: {rough_metrics['rewards'][-1]:.2f} ± {rough_metrics['reward_std'][-1]:.2f}")
    logger.info(f"  Peak reward: {max(rough_metrics['rewards']):.2f}")

    # Create comparison plot
    logger.info("Generating performance comparison plot...")
    plt.figure(figsize=(12, 6))

    # Plot flat terrain progress
    plt.plot(flat_metrics['steps'], flat_metrics['rewards'],
             label='Flat Terrain', color='blue')
    plt.fill_between(
        flat_metrics['steps'],
        np.array(flat_metrics['rewards']) - np.array(flat_metrics['reward_std']),
        np.array(flat_metrics['rewards']) + np.array(flat_metrics['reward_std']),
        alpha=0.2,
        color='blue'
    )

    # Plot rough terrain progress
    plt.plot(rough_metrics['steps'], rough_metrics['rewards'],
             label='Rough Terrain', color='red')
    plt.fill_between(
        rough_metrics['steps'],
        np.array(rough_metrics['rewards']) - np.array(rough_metrics['reward_std']),
        np.array(rough_metrics['rewards']) + np.array(rough_metrics['reward_std']),
        alpha=0.2,
        color='red'
    )

    plt.xlabel('Training Steps')
    plt.ylabel('Episode Reward')
    plt.title('Training Progress Comparison')
    plt.grid(True)
    plt.legend()
    plt.savefig('training_comparison.png')
    plt.close()

##############
# Main Script
##############

def main():
    """Main training pipeline"""
    logger.info("=" * 50)
    logger.info("Starting ZBot Training Pipeline")
    logger.info("=" * 50)

    # Create output directory
    output_dir = Path("outputs")
    output_dir.mkdir(exist_ok=True)
    logger.info(f"Created output directory: {output_dir}")

    try:
        # Train on flat terrain
        logger.info("Starting flat terrain training phase...")
        flat_runner = train_flat_terrain()

        # Train on rough terrain
        logger.info("Starting rough terrain adaptation phase...")
        rough_runner = train_rough_terrain(flat_runner)

        # Load and analyze results
        logger.info("Loading training metrics for analysis...")
        with open("flat_terrain_metrics.pkl", 'rb') as f:
            flat_metrics = pickle.load(f)
        with open("rough_terrain_metrics.pkl", 'rb') as f:
            rough_metrics = pickle.load(f)

        analyze_performance(flat_metrics, rough_metrics)

        logger.info("Training pipeline completed successfully!")
        logger.info("Check the outputs directory for results and visualizations.")

    except Exception as e:
        logger.error(f"An error occurred during training: {str(e)}", exc_info=True)
        raise

if __name__ == "__main__":
    main()

Overwriting train_zbot.py


In [5]:
!pip install opencv-python

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [6]:
#@title Check Repository Structure
!pwd
!ls -R /home/jovyan/mujoco_playground/playground/zbot/
!python3 -c "from mujoco_playground._src import mjx_env; print('Import successful!')"

/home/jovyan
/home/jovyan/mujoco_playground/playground/zbot/:
base.py  __init__.py  joystick.py  randomize.py  xmls  zbot_constants.py

/home/jovyan/mujoco_playground/playground/zbot/xmls:
assets				     scene_mjx_feetonly_rough_terrain.xml
scene_mjx_feetonly_flat_terrain.xml  zbot_feet_only.xml

/home/jovyan/mujoco_playground/playground/zbot/xmls/assets:
hfield.png  rocky_texture.png
Import successful!


In [7]:

%%capture
import os
import subprocess
from pathlib import Path

# Define paths
EXTERNAL_DEPS_PATH = Path.home() / ".mujoco_playground" / "external"
MENAGERIE_PATH = EXTERNAL_DEPS_PATH / "mujoco_menagerie"
MENAGERIE_COMMIT_SHA = "main"  # or specific SHA if needed

# Create directory if it doesn't exist
EXTERNAL_DEPS_PATH.mkdir(exist_ok=True, parents=True)

def download_menagerie():
    """Download mujoco_menagerie with better error handling"""
    print("Attempting to download mujoco_menagerie...")
    
    # Remove existing directory if it exists
    if MENAGERIE_PATH.exists():
        print("Removing existing mujoco_menagerie directory...")
        subprocess.run(["rm", "-rf", str(MENAGERIE_PATH)])
    
    try:
        # Try using git clone
        print("Attempting git clone...")
        subprocess.run(
            ["git", "clone", "--depth", "1", 
             "https://github.com/deepmind/mujoco_menagerie.git",
             str(MENAGERIE_PATH)],
            check=True,
            capture_output=True
        )
    except subprocess.CalledProcessError:
        print("Git clone failed, trying alternative download method...")
        # Alternative: Download using wget/curl
        try:
            subprocess.run([
                "wget", 
                "https://github.com/deepmind/mujoco_menagerie/archive/refs/heads/main.zip",
                "-O", "/tmp/menagerie.zip"
            ], check=True)
            
            # Extract the downloaded zip
            subprocess.run(["unzip", "/tmp/menagerie.zip", "-d", "/tmp"], check=True)
            
            # Move to correct location
            subprocess.run([
                "mv", 
                "/tmp/mujoco_menagerie-main", 
                str(MENAGERIE_PATH)
            ], check=True)
            
            # Cleanup
            os.remove("/tmp/menagerie.zip")
            print("Successfully downloaded mujoco_menagerie using alternative method")
        except subprocess.CalledProcessError as e:
            print(f"Error downloading mujoco_menagerie: {e}")
            raise

# Try to download
try:
    download_menagerie()
except Exception as e:
    print(f"Failed to download mujoco_menagerie: {e}")
    print("Please check your internet connection and try again")
    raise

# Verify the installation
if MENAGERIE_PATH.exists():
    print("mujoco_menagerie successfully installed")
else:
    print("Failed to install mujoco_menagerie")

In [8]:
#@title Training Configuration
#@markdown Adjust training parameters here
NUM_EPISODES = 3  #@param {type:"integer"}
EPISODE_LENGTH = 3000  #@param {type:"integer"}
TASK = "rough_terrain"  #@param ["flat_terrain", "rough_terrain"]
LOAD_EXISTING = False  #@param {type:"boolean"}

# Ensure we're in the correct directory
%cd /home/jovyan

# Import after path setup
from train_zbot import create_training_args, train_flat_terrain, train_rough_terrain

args = create_training_args(
    task=TASK,
    load_existing=LOAD_EXISTING
)
args.num_episodes = NUM_EPISODES
args.episode_length = EPISODE_LENGTH

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/home/jovyan


2025-01-30 16:32:52,442 - INFO - Handler "orbax.checkpoint._src.handlers.array_checkpoint_handler.ArrayCheckpointHandler" already exists in the registry with associated type <class 'orbax.checkpoint._src.handlers.array_checkpoint_handler.ArrayCheckpointHandler'>. Skipping registration.
2025-01-30 16:32:52,445 - INFO - Handler "orbax.checkpoint._src.handlers.proto_checkpoint_handler.ProtoCheckpointHandler" already exists in the registry with associated type <class 'orbax.checkpoint._src.handlers.proto_checkpoint_handler.ProtoCheckpointHandler'>. Skipping registration.
2025-01-30 16:32:52,447 - INFO - Handler "orbax.checkpoint._src.handlers.json_checkpoint_handler.JsonCheckpointHandler" already exists in the registry with associated type <class 'orbax.checkpoint._src.handlers.json_checkpoint_handler.JsonCheckpointHandler'>. Skipping registration.
2025-01-30 16:32:52,451 - INFO - Handler "orbax.checkpoint._src.handlers.base_pytree_checkpoint_handler.BasePyTreeCheckpointHandler" already ex

In [None]:
#@title Run Training
#@markdown Click to start training

# First, ensure we're in the correct directory
import os
os.chdir('/home/jovyan/mujoco_playground')

# Add the repository root to Python path
import sys
sys.path.insert(0, '/home/jovyan/mujoco_playground')

# Import after path setup
from playground.zbot import zbot_constants
from playground.runner import ZBotRunner
from train_zbot import create_training_args, train_flat_terrain, train_rough_terrain

# Verify XML file exists
xml_path = zbot_constants.task_to_xml(TASK)
print(f"Looking for XML file at: {xml_path}")
print(f"File exists: {os.path.exists(xml_path)}")

# Run training with proper error handling
try:
    if TASK == "flat_terrain":
        print("Starting flat terrain training...")
        runner = train_flat_terrain()
    else:
        print("Starting rough terrain training...")
        flat_runner = train_flat_terrain()
        runner = train_rough_terrain(flat_runner)

    # Display training progress
    display.display(plt.gcf())

except FileNotFoundError as e:
    print(f"Error: Could not find required files: {e}")
    print("Current working directory:", os.getcwd())
    print("\nContents of zbot directory:")
    !ls -R playground/zbot/
except Exception as e:
    print(f"An error occurred: {e}")
    import traceback
    traceback.print_exc()

2025-01-30 16:32:52,473 - INFO - Starting flat terrain training phase
2025-01-30 16:32:52,474 - INFO - Training configuration:
2025-01-30 16:32:52,474 - INFO -   env: ZbotJoystickFlatTerrain
2025-01-30 16:32:52,475 - INFO -   task: flat_terrain
2025-01-30 16:32:52,475 - INFO -   debug: False
2025-01-30 16:32:52,475 - INFO -   save_model: True
2025-01-30 16:32:52,475 - INFO -   load_model: False
2025-01-30 16:32:52,476 - INFO -   seed: 42
2025-01-30 16:32:52,476 - INFO -   num_episodes: 3
2025-01-30 16:32:52,476 - INFO -   episode_length: 3000
2025-01-30 16:32:52,477 - INFO -   x_vel: 1.0
2025-01-30 16:32:52,477 - INFO -   y_vel: 0.0
2025-01-30 16:32:52,477 - INFO -   yaw_vel: 0.0


Looking for XML file at: playground/zbot/xmls/scene_mjx_feetonly_rough_terrain.xml
File exists: True
Starting rough terrain training...


2025-01-30 16:32:53,538 - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-01-30 16:32:53,539 - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-01-30 16:32:55,280 - INFO - RL config: action_repeat: 1
batch_size: 512
clipping_epsilon: 0.2
discounting: 0.97
entropy_cost: 0.01
episode_length: 1000
learning_rate: 0.0003
max_grad_norm: 1.0
network_factory:
  policy_hidden_layer_sizes: &id001 !!python/tuple
  - 1024
  - 512
  - 256
  policy_obs_key: state
  value_hidden_layer_sizes: *id001
  value_obs_key: privileged_state
normalize_observations: true
num_envs: 16384
num_evals: 20
num_minibatches: 32
num_resets_per_eval: 2
num_timesteps: 200000000
num_updates_per_batch: 8
reward_scaling: 1.0
unroll_length: 20

2025-01-30 16:32:55,281 - INFO - Beginning training loop...
2025-01-30 16:32:55,282 - INFO - Device count: 1, pro