In [None]:
import mujoco
import mujoco_viewer

model = mujoco.MjModel.from_xml_path('humanoid.xml')
data = mujoco.MjData(model)

# create the viewer object
viewer = mujoco_viewer.MujocoViewer(model, data)

# simulate and render
for _ in range(10000):
    if viewer.is_alive:
        mujoco.mj_step(model, data)
        viewer.render()
    else:
        break

# close
viewer.close()

In [14]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
import mujoco
import gym
import numpy as np

# Set random seed for reproducibility
set_random_seed(0)

# Define the Mujoco environment
class MujocoEnv(gym.Env):
    def __init__(self, xml_path):
        self.model = mujoco.MjModel.from_xml_path(xml_path)
        self.data = mujoco.MjData(self.model)
        
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(self.model.nu,))
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.model.nq + self.model.nv,))
        self.viewer_setup = False

    def step(self, action):
        self.data.ctrl[:] = action
        self.step()
        obs = np.concatenate([self.sim.data.qpos, self.sim.data.qvel])
        reward = 0.0
        done = False
        info = {}
        return obs, reward, done, info

    def reset(self):
        mjlib.mj_resetData(self.model.ptr, self.sim.data.ptr)
        qpos = self.model.data.qpos
        qpos[0] = 1.0
        qpos[2] = 0.2
        qpos[3] = 0.0
        qpos[4] = 0.0
        qpos[5] = 0.0
        qpos[6] = 0.0
        qpos[7] = 0.0
        qpos[8] = 0.0
        qpos[9] = 0.0
        qpos[10] = 0.0
        qpos[11] = 0.0
        qpos[12] = 0.0
        self.sim.data.qpos[:] = qpos
        self.sim.forward()
        obs = np.concatenate([self.sim.data.qpos, self.sim.data.qvel])
        return obs

    def render(self, mode='human'):
        if not self.viewer_setup:
            self.viewer_setup = True
            self.viewer.cam.distance = 3.0
            self.viewer.cam.elevation = -20.0
            self.viewer.cam.lookat[0] = 0.0
            self.viewer.cam.lookat[1] = 0.0
            self.viewer.cam.lookat[2] = 0.5
        self.viewer.render()

# Create a vectorized environment
env = SubprocVecEnv([lambda: MujocoEnv('humanoid.xml') for i in range(1)])


In [18]:
# Train the agent using PPO
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)

# # Save the trained model
# model.save("trained_model")

# Evaluate the trained agent
obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()

env.close()

Using cpu device


BrokenPipeError: [Errno 32] Broken pipe