# MIKASA-Robo basic usage

In [None]:
import mikasa_robo_suite
from mikasa_robo_suite.utils.wrappers import StateOnlyTensorToDictWrapper
from tqdm.notebook import tqdm
import torch
import gymnasium as gym
from mani_skill.utils.wrappers import RecordEpisode
from IPython.display import Video


episode_timeout = 90
env = gym.make("RememberColor9-v0", num_envs=4, obs_mode="rgb", render_mode="all")
env = RecordEpisode(env, f"./videos/{"RememberColor9-v0"}", max_steps_per_video=episode_timeout)
env = StateOnlyTensorToDictWrapper(env)

obs, _ = env.reset(seed=42)
print(obs.keys())
for i in tqdm(range(episode_timeout)):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(torch.from_numpy(action))

env.close()

Video(f"./videos/{"RememberColor9-v0"}/0.mp4", embed=True, width=640)

# Run MIKASA-Robo with predefined wrappers (recommended)

**Important!** Do not forget to always use `env = StateOnlyTensorToDictWrapper(env)` with MIKASA-Robo environments. All other wrappers are optional and can be used for debugging.

In [1]:
import mikasa_robo_suite
from mikasa_robo_suite.dataset_collectors.get_mikasa_robo_datasets import env_info
from tqdm.notebook import tqdm
import torch
import gymnasium as gym
from mani_skill.utils.wrappers import RecordEpisode
from IPython.display import Video

env_name = "RememberColor9-v0"
obs_mode = "rgb" # or "state"
num_envs = 4
seed = 42

env = gym.make(env_name, num_envs=num_envs, obs_mode=obs_mode, render_mode="all")

state_wrappers_list, episode_timeout = env_info(env_name)
print(f"Episode timeout: {episode_timeout}")
for wrapper_class, wrapper_kwargs in state_wrappers_list:
    env = wrapper_class(env, **wrapper_kwargs)

env = RecordEpisode(env, f"./videos/{env_name}", max_steps_per_video=episode_timeout)

obs, _ = env.reset(seed=seed)
print(obs.keys())
for i in tqdm(range(episode_timeout)):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(torch.from_numpy(action))

env.close()

Video(f"./videos/{env_name}/0.mp4", embed=True, width=640)



Episode timeout: 60
dict_keys(['agent', 'extra', 'sensor_param', 'sensor_data', 'prompt', 'oracle_info'])


  0%|          | 0/60 [00:00<?, ?it/s]

# Run MIKASA-Robo with selective wrappers

In [1]:
import mikasa_robo_suite
from mikasa_robo_suite.utils.wrappers import *
from mikasa_robo_suite.memory_envs import *
import gymnasium as gym
from gymnasium.envs.registration import registry
from tqdm.notebook import tqdm
from mani_skill.utils.wrappers import RecordEpisode
from IPython.display import Video

env_name = "ShellGameTouch-v0"
obs_mode = "state"
num_envs = 4
seed = 42

env = gym.make(env_name, num_envs=num_envs, obs_mode=obs_mode, render_mode="all")
max_steps = registry.get(env_name).max_episode_steps
print(f"Episode timeout: {max_steps}")

env = StateOnlyTensorToDictWrapper(env)
env = InitialZeroActionWrapper(env, n_initial_steps=1)
env = ShellGameRenderCupInfoWrapper(env)
env = RenderStepInfoWrapper(env)
env = RenderRewardInfoWrapper(env)
env = DebugRewardWrapper(env)

env = RecordEpisode(env, f"./videos/{env_name}", max_steps_per_video=max_steps)

obs, _ = env.reset(seed=seed)
print(obs.keys())
for i in tqdm(range(max_steps)):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(torch.from_numpy(action))

env.close()

Video(f"./videos/{env_name}/0.mp4", embed=True, width=640)

There are less parallel environments than total available models to sample.
                Not all models will be used during interaction even after resets unless you call env.reset(options=dict(reconfigure=True))
                or set reconfiguration_freq to be >= 1.




Episode timeout: 90
dict_keys(['state', 'prompt', 'oracle_info'])


  0%|          | 0/90 [00:00<?, ?it/s]

In [1]:
from mikasa_robo_suite.utils.wrappers import *
from mikasa_robo_suite.memory_envs import *
import gymnasium as gym
from gymnasium.envs.registration import registry
from tqdm.notebook import tqdm

from mani_skill.utils.wrappers import RecordEpisode
from IPython.display import Video

In [2]:
env_name = "TakeItBack-v0"
obs_mode = "state"
num_envs = 4
seed = 42

env = gym.make(env_name, num_envs=num_envs, obs_mode=obs_mode, render_mode="all")
max_steps = registry.get(env_name).max_episode_steps
print(f"max_steps: {max_steps}")

env = StateOnlyTensorToDictWrapper(env)
env = InitialZeroActionWrapper(env, n_initial_steps=1)
env = RenderStepInfoWrapper(env)
env = RenderRewardInfoWrapper(env)
env = DebugRewardWrapper(env)

env = RecordEpisode(env, f"./videos/{env_name}", max_steps_per_video=max_steps)

obs, _ = env.reset(seed=seed)
for i in tqdm(range(max_steps)):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(torch.from_numpy(action))

env.close()

Video(f"./videos/{env_name}/0.mp4", embed=True, width=640)

max_steps: 180




  0%|          | 0/180 [00:00<?, ?it/s]