In [None]:
# Setup Vulkan for ManiSkill on Colab
!sudo mkdir -p /usr/share/vulkan/icd.d
!sudo mkdir -p /usr/share/glvnd/egl_vendor.d
!wget -q https://raw.githubusercontent.com/haosulab/ManiSkill/main/docker/nvidia_icd.json
!wget -q https://raw.githubusercontent.com/haosulab/ManiSkill/main/docker/10_nvidia.json
!sudo mv nvidia_icd.json /usr/share/vulkan/icd.d/
!sudo mv 10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json
!sudo apt-get update
!sudo apt-get install -y --no-install-recommends libvulkan-dev

# Install ManiSkill dependencies
!pip install -U "mani_skill[dev]" tyro

In [None]:
# Import required packages
import gymnasium as gym
import mani_skill.envs
import time
env = gym.make("PegInsertionSide-v1")
obs, _ = env.reset(seed=0)
env.unwrapped.print_sim_details() # print verbose details about the configuration
done = False
start_time = time.time()
while not done:
    obs, rew, terminated, truncated, info = env.step(env.action_space.sample())
    done = terminated or truncated
N = info["elapsed_steps"].item()
dt = time.time() - start_time
FPS = N / (dt)
print(f"Frames Per Second = {N} / {dt} = {FPS}")

In [None]:
from mani_skill.utils.wrappers import CPUGymWrapper
env = gym.make("PegInsertionSide-v1")
env = CPUGymWrapper(env)
obs, _ = env.reset() # obs is numpy and unbatched
print(type(obs), obs.shape)

In [None]:
import matplotlib.pyplot as plt
import torch # Import torch for using cpu()

env = gym.make("PegInsertionSide-v1", render_mode="rgb_array")
env.reset()
# Move the tensor to CPU before converting to NumPy array
plt.imshow(env.render()[0].cpu().numpy()) # we take [0].numpy() as everything is a batched tensor

In [None]:
### Make sure to restart the notebook if you already ran a CPU sim!! ###
# Import required packages
import gymnasium as gym
import mani_skill.envs
import torch
import time
num_envs = 2048 # you can go up to 4096 on better GPUs
env = gym.make("PickCube-v1", num_envs=num_envs)
env.unwrapped.print_sim_details()
obs, _ = env.reset(seed=0)
done = False
start_time = time.time()
total_rew = 0
while not done:
    # note that env.action_space is now a batched action space
    obs, rew, terminated, truncated, info = env.step(torch.from_numpy(env.action_space.sample()))
    done = (terminated | truncated).any() # stop if any environment terminates/truncates
N = num_envs * info["elapsed_steps"][0].item()
dt = time.time() - start_time
FPS = N / (dt)
print(f"Frames Per Second = {N} / {dt} = {FPS}")

In [None]:
# Import required packages
import gymnasium as gym
import mani_skill.envs
import torch
import time
num_envs = 512 # you can go up higher on better GPUs, this is mostly memory constrained
env = gym.make("PickCube-v1", num_envs=num_envs, obs_mode="rgbd")
env.unwrapped.print_sim_details()
obs, _ = env.reset(seed=0)
done = False
start_time = time.time()
total_rew = 0
while not done:
    # note that env.action_space is now a batched action space
    obs, rew, terminated, truncated, info = env.step(torch.from_numpy(env.action_space.sample()))
    done = (terminated | truncated).any() # stop if any environment terminates/truncates
N = num_envs * info["elapsed_steps"][0].item()
dt = time.time() - start_time
FPS = N / (dt)
print(f"Frames Per Second = {N} / {dt} = {FPS}")

In [None]:
# visualize the image data from the environment and inspect the data
print(obs.keys())
print(obs['sensor_data'].keys())
print(obs['sensor_data']['base_camera'].keys())
print(obs['sensor_data']['base_camera']['rgb'].shape)
import matplotlib.pyplot as plt
plt.imshow(obs['sensor_data']['base_camera']['rgb'][0].cpu().numpy())

In [None]:
# Import required packages
import gymnasium as gym
from tqdm.notebook import tqdm
import numpy as np
import mani_skill.envs
import matplotlib.pyplot as plt

In [None]:
#@markdown Run this cell to display the action space of the chosen controller as well as the current view of the environment
# Can be any env_id from the list of Rigid-Body envs: https://maniskill.readthedocs.io/en/latest/tasks/index.html
env_id = "PickCube-v1" #@param ['PickCube-v1', 'PegInsertionSide-v1', 'StackCube-v1']

# choose an observation type and space, see https://maniskill.readthedocs.io/en/latest/user_guide/concepts/observation.html for details
obs_mode = "pointcloud" #@param can be one of ['pointcloud', 'rgb+depth+segmentation', 'state_dict', 'state']

# choose a controller type / action space, see https://maniskill.readthedocs.io/en/latest/user_guide/concepts/controllers.html for a full list
control_mode = "pd_joint_delta_pos" #@param can be one of ['pd_ee_delta_pose', 'pd_ee_delta_pos', 'pd_joint_delta_pos', 'arm_pd_joint_pos_vel']

reward_mode = "dense" #@param can be one of ['sparse', 'dense']

robot_uids = "panda" #@param can be one of ['panda', 'fetch']

# create an environment with our configs and then reset to a clean state
env = gym.make(env_id,
               num_envs=4,
               obs_mode=obs_mode,
               reward_mode=reward_mode,
               control_mode=control_mode,
               robot_uids=robot_uids,
               enable_shadow=True # this makes the default lighting cast shadows
               )
obs, _ = env.reset()
print("Action Space:", env.action_space)

# take a look at the current state of the 4 parallel environments we created
fig, axs = plt.subplots(2, 2, figsize=(8, 8))
rgbs = env.render_rgb_array() # this is a easy way to get the rgb array without having to set render_mode
for i, ax in enumerate(axs.flatten()):
    ax.imshow(rgbs[i].cpu().numpy())
    ax.axis("off")
plt.suptitle("Current States viewed from external cameras")
fig.tight_layout()
env.close()

In [None]:
# some visualization functions for different observation modes
def show_camera_view(obs_camera, title, env_id=0):
    plt.figure()
    rgb, depth = obs_camera['rgb'], obs_camera['depth']
    plt.subplot(1,3,1)
    plt.title(f"{title} - RGB")
    plt.imshow(rgb[env_id].cpu().numpy())
    plt.subplot(1,3,2)
    plt.title(f"{title} - Depth")
    plt.imshow(depth[..., 0][env_id].cpu().numpy(), cmap="gray")
    plt.subplot(1,3,3)
    plt.title(f"{title} - Segmentation")
    plt.imshow(obs_camera["segmentation"][..., 0][env_id].cpu().numpy())

def show_pointcloud(obs, env_id=0):
    import trimesh
    v = obs['pointcloud']['xyzw'][env_id, ..., :3].cpu().numpy()
    cam2world = obs["sensor_param"]["base_camera"]["cam2world_gl"][env_id].cpu().numpy()
    cam2world = cam2world
    camera = trimesh.scene.Camera("camera", (1024, 1024), fov=(np.rad2deg(np.pi/2), np.rad2deg(np.pi/2)))
    s = trimesh.Scene([trimesh.points.PointCloud(v, obs['pointcloud']['rgb'][env_id].cpu().numpy())], camera=camera, camera_transform=cam2world)
    return s.show()

In [None]:
#@markdown Display the RGBD+Segmentation observation. Make sure you are using `obs_mode="rgbd"`
show_camera_view(obs['sensor_data']['base_camera'], "Base")

In [None]:
#@markdown Display one of the Pointcloud observations. Make sure you are using `obs_mode="pointcloud"`
show_pointcloud(obs)

In [None]:
# Import required packages
import gymnasium as gym
import torch
import mani_skill.envs
from tqdm.notebook import tqdm
from mani_skill.utils.wrappers import RecordEpisode
# to make it look a little more realistic, we will enable shadows which make the default lighting cast shadows
env = gym.make("PickCube-v1", num_envs=4, render_mode="rgb_array", enable_shadow=True)
env = RecordEpisode(
    env,
    "./videos", # the directory to save replay videos and trajectories to
    # on GPU sim we record intervals, not by single episodes as there are multiple envs
    # each 100 steps a new video is saved
    max_steps_per_video=100
)

# step through the environment with random actions
obs, _ = env.reset()
for i in tqdm(range(100)):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(torch.from_numpy(action))
    # env.render_human() # will render with a window if possible
env.close()
from IPython.display import Video
Video("./videos/0.mp4", embed=True, width=640) # Watch our replay

In [None]:
!python -m mani_skill.examples.teleoperation.interactive_panda -e "StackCube-v1"

In [None]:
%%html
<video controls width=800>
<source src="https://github.com/haosulab/ManiSkill/raw/main/docs/source/_static/videos/teleop-stackcube-demo.mp4">
</video>

#2 Reinforcement Learning

In [None]:
from IPython.display import Video

In [None]:
!wget https://raw.githubusercontent.com/haosulab/ManiSkill/main/examples/baselines/ppo/ppo.py -O ppo.py
!wget https://raw.githubusercontent.com/haosulab/ManiSkill/main/examples/baselines/ppo/ppo_rgb.py -O ppo_rgb.py

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs

In [None]:
!python ppo.py --env_id="PushCube-v1" --exp-name="state-pushcube" \
  --num_envs=1024 --update_epochs=8 --num_minibatches=32 \
  --total_timesteps=600_000 --eval_freq=8 --num-steps=20

In [None]:
Video("runs/state-pushcube/videos/3.mp4", embed=True, width=1024) # Watch a replay during training evaluation

In [None]:
!python ppo_rgb.py --env_id="PushCube-v1" --exp-name="rgb-pushcube" \
  --num_envs=256 --update_epochs=8 --num_minibatches=16 \
  --total_timesteps=250_000 --eval_freq=10 --num-steps=20

In [None]:
!python ppo_rgb.py --env_id="PushCube-v1" \
  --evaluate --checkpoint=runs/rgb-pushcube/ckpt_41.pt \
  --num_eval_envs=1 --num-eval-steps=100

In [None]:
!python -m mani_skill.trajectory.replay_trajectory \
  --traj-path=/content/runs/rgb-pushcube/test_videos/trajectory.h5 --use-env-states \
  --render-mode="sensors" --save-video --allow-failure

In [None]:
Video("runs/rgb-pushcube/test_videos/0.mp4", embed=True, width=256) # Watch our replay from the camera perspective we trained on

#3 Demonstration Data

In [None]:
from IPython.display import Video

In [None]:
!python -m mani_skill.utils.download_demo "PegInsertionSide-v1" -o demos

In [None]:
Video("demos/PegInsertionSide-v1/motionplanning/sample.mp4", embed=True, width=512)

In [None]:
from mani_skill.trajectory.dataset import ManiSkillTrajectoryDataset
dataset = ManiSkillTrajectoryDataset(dataset_file="demos/PegInsertionSide-v1/motionplanning/trajectory.h5")
data = dataset[150]
for k, v in data.items():
    print(k, v)

In [None]:
import h5py
from mani_skill.utils.io_utils import load_json

# Load the trajectory data from the .h5 file. Demonstrations are versioned and thus saved to "demos/<version>/..."
traj_path = f"demos/PegInsertionSide-v1/motionplanning/trajectory.h5"
# You can also replace the above path with the trajectory you just recorded (./tmp/trajectory.h5)
h5_file = h5py.File(traj_path, "r")

# Load associated json
json_path = traj_path.replace(".h5", ".json")
json_data = load_json(json_path)

episodes = json_data["episodes"] # meta data of each episode
env_info = json_data["env_info"]
env_id = env_info["env_id"]
env_kwargs = env_info["env_kwargs"]

print("env_id:", env_id)
print("env_kwargs:", env_kwargs)
print("#episodes:", len(episodes))
print("Dataset source:", json_data["source_type"])
print("Dataset source description:", json_data["source_desc"])

In [None]:
traj_id = "traj_0"
traj_h5 = h5_file[traj_id]
def print_h5py_structure(traj_h5, prefix=""):
    for key in traj_h5:
        if isinstance(traj_h5[key], h5py.Group):
            print_h5py_structure(traj_h5[key], prefix=prefix + "/" + key)
        else:
            print(prefix + "/" + key, traj_h5[key].shape, traj_h5[key].dtype)

print_h5py_structure(traj_h5)

In [None]:
from mani_skill.trajectory.utils import index_dict, dict_to_list_of_dicts
index_dict(traj_h5["env_states"], 23) # select the 23rd element of all values

In [None]:
env_states = dict_to_list_of_dicts(traj_h5["env_states"]) # convert to a list of dictionaries
env_states[23]

In [None]:
from mani_skill.utils.visualization.misc import images_to_video
import gymnasium as gym
import mani_skill.envs
from tqdm.notebook import tqdm
def replay(episode_idx, h5_file, json_data, render_mode="cameras", fps=20):
    episodes = json_data["episodes"]
    ep = episodes[episode_idx]
    # episode_id should be the same as episode_idx, unless specified otherwise
    episode_id = ep["episode_id"]
    traj = h5_file[f"traj_{episode_id}"]
    env_states = dict_to_list_of_dicts(traj["env_states"])

    # Create the environment
    env_kwargs = json_data["env_info"]["env_kwargs"]
    env = gym.make(json_data["env_info"]["env_id"], **env_kwargs)
    print(env_kwargs)
    # Reset the environment
    reset_kwargs = ep["reset_kwargs"].copy()
    reset_kwargs["seed"] = ep["episode_seed"]
    env.reset(**reset_kwargs)

    frames = [env.render_rgb_array()[0].numpy()]
    for i in tqdm(range(len(traj["actions"]))):
        action = traj["actions"][i]
        obs, reward, terminated, truncated, info = env.step(action)
        env.set_state_dict(env_states[i])
        frames.append(env.render_rgb_array()[0].numpy())

    env.close()
    del env
    images_to_video(frames, output_dir=".", video_name="replay", fps=30, )

In [None]:
#@markdown Choose an episode ID here and run this cell to watch a replay of a expert demo. Note that this can be a little slow as this code generates a video. To run faster we recommend watching on a machine with a GUI and running only the replay function.

episode_idx = 4 #@param {type:"integer"}
replay(episode_idx, h5_file, json_data)
Video("./replay.mp4", embed=True)

In [None]:
!python -m mani_skill.trajectory.replay_trajectory \
    --traj-path demos/PegInsertionSide-v1/motionplanning/trajectory.h5 --save-traj \
    --obs-mode rgbd -c "pd_joint_delta_pos" --num-procs 1 --count 2 # only generate 2

In [None]:
from mani_skill.trajectory.dataset import ManiSkillTrajectoryDataset
import matplotlib.pyplot as plt
dataset = ManiSkillTrajectoryDataset(dataset_file="demos/PegInsertionSide-v1/motionplanning/trajectory.rgbd.pd_joint_delta_pos.cpu.h5")
data = dataset[0]
fig, axs = plt.subplots(1, 2, figsize=(10, 5))
axs[0].imshow(data["obs"]["sensor_data"]["hand_camera"]["rgb"])
axs[1].imshow(data["obs"]["sensor_data"]["hand_camera"]["depth"])

#4 Heterogeneous Parallel Simulation

In [None]:
# Import required packages
import gymnasium as gym
import torch
import mani_skill.envs
from tqdm.notebook import tqdm
from mani_skill.utils.wrappers import RecordEpisode
from IPython.display import Video

In [None]:
# asset downloads may vary in speed depending on server
!python -m mani_skill.utils.download_asset -y PickClutterYCB-v1
!python -m mani_skill.utils.download_asset -y partnet_mobility

In [None]:
# See section 1.5 for more details on how we create environments and save videos
env = gym.make("PickClutterYCB-v1", num_envs=4, render_mode="rgb_array", enable_shadow=True)
env = RecordEpisode(env, "./videos", max_steps_per_video=100, save_trajectory=False)

# step through the environment with random actions
obs, _ = env.reset(seed=0)
for i in tqdm(range(100)):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(torch.from_numpy(action))
env.close()
Video("./videos/0.mp4", embed=True, width=640) # Watch our replay

In [None]:
env = gym.make("OpenCabinetDrawer-v1", num_envs=4, control_mode="pd_joint_delta_pos", render_mode="rgb_array", enable_shadow=True)
env = RecordEpisode(env, "./videos", max_steps_per_video=100, save_trajectory=False)

obs, _ = env.reset(seed=0)
for i in tqdm(range(100)):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(torch.from_numpy(action))
env.close()
Video("./videos/0.mp4", embed=True, width=640) # Watch our replay