In [2]:
#@title Run to install MuJoCo and `dm_control`
import distutils.util
import os
import subprocess
if subprocess.run('nvidia-smi').returncode:
  raise RuntimeError(
      'Cannot communicate with GPU. '
      'Make sure you are using a GPU Colab runtime. '
      'Go to the Runtime menu and select Choose runtime type.')

# Add an ICD config so that glvnd can pick up the Nvidia EGL driver.
# This is usually installed as part of an Nvidia driver package, but the Colab
# kernel doesn't install its driver via APT, and as a result the ICD is missing.
# (https://github.com/NVIDIA/libglvnd/blob/master/src/EGL/icd_enumeration.md)
NVIDIA_ICD_CONFIG_PATH = '/usr/share/glvnd/egl_vendor.d/10_nvidia.json'
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
  with open(NVIDIA_ICD_CONFIG_PATH, 'w') as f:
    f.write("""{
    "file_format_version" : "1.0.0",
    "ICD" : {
        "library_path" : "libEGL_nvidia.so.0"
    }
}
""")

# print('Installing dm_control...')
# !pip install -q dm_control>=1.0.18

# Configure dm_control to use the EGL rendering backend (requires GPU)
%env MUJOCO_GL=egl

print('Checking that the dm_control installation succeeded...')
try:
  from dm_control import suite
  env = suite.load('cartpole', 'swingup')
  pixels = env.physics.render()
except Exception as e:
  raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.\n'
      'If using a hosted Colab runtime, make sure you enable GPU acceleration '
      'by going to the Runtime menu and selecting "Choose runtime type".')
else:
  del pixels, suite

!echo Installed dm_control $(pip show dm_control | grep -Po "(?<=Version: ).+")

Sun Oct 20 18:17:17 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.90.07              Driver Version: 550.90.07      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        Off |   00000000:4B:00.0 Off |                  Off |
|  0%   30C    P8             20W /  450W |     353MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA GeForce RTX 4090        Off |   00

In [3]:
#@title Other imports and helper functions

# General
import copy
import os
import itertools
from IPython.display import clear_output
import numpy as np

# Graphics-related
import matplotlib
import matplotlib.animation as animation
import matplotlib.pyplot as plt
from IPython.display import HTML
import PIL.Image
# Internal loading of video libraries.

# Use svg backend for figure rendering
%config InlineBackend.figure_format = 'svg'

# Font sizes
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12
plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# Inline video helper function
if os.environ.get('COLAB_NOTEBOOK_TEST', False):
  # We skip video generation during tests, as it is quite expensive.
  display_video = lambda *args, **kwargs: None
else:
  def display_video(frames, framerate=30):
    height, width, _ = frames[0].shape
    dpi = 70

    orig_backend = matplotlib.get_backend()
    matplotlib.use('Agg')  # Switch to headless 'Agg' to inhibit figure rendering.
    fig, ax = plt.subplots(1, 1, figsize=(width / dpi, height / dpi), dpi=dpi)
    # fig, ax = plt.subplots(1, 1, figsize=(scaled_width / dpi, scaled_height / dpi), dpi=dpi)
    matplotlib.use(orig_backend)  # Switch back to the original backend.
    ax.set_axis_off()
    ax.set_aspect('equal')
    ax.set_position([0, 0, 1, 1])
    im = ax.imshow(frames[0])
    def update(frame):
      im.set_data(frame)
      return [im]
    interval = 1000/framerate
    anim = animation.FuncAnimation(fig=fig, func=update, frames=frames,
                                   interval=interval, blit=True, repeat=False)
    return HTML(anim.to_html5_video())

# Seed numpy's global RNG so that cell outputs are deterministic. We also try to
# use RandomState instances that are local to a single cell wherever possible.
np.random.seed(42)

In [14]:
from ray import tune

def env_creator(config):
    from env import mice_env
    import shimmy

    gym_mice_env = shimmy.DmControlCompatibilityV0(mice_env.rodent_maze_forage())
    print("Observation Space in create_custom_env:", gym_mice_env.observation_space)
    
    return gym_mice_env  # 或者 CustomEnv()

tune.register_env("MiceEnv", env_creator)

In [15]:
env_creator(None)

Observation Space in create_custom_env: Dict('walker/actuator_activation': Box([], [], (0,), float64), 'walker/appendages_pos': Box(-inf, inf, (15,), float64), 'walker/body_height': Box(-inf, inf, (), float64), 'walker/egocentric_camera': Box(0, 255, (64, 64, 3), uint8), 'walker/end_effectors_pos': Box(-inf, inf, (12,), float64), 'walker/joints_pos': Box(-inf, inf, (56,), float64), 'walker/joints_vel': Box(-inf, inf, (56,), float64), 'walker/sensors_accelerometer': Box([], [], (0,), float64), 'walker/sensors_force': Box([], [], (0,), float64), 'walker/sensors_gyro': Box([], [], (0,), float64), 'walker/sensors_torque': Box([], [], (0,), float64), 'walker/sensors_touch': Box([], [], (0,), float64), 'walker/sensors_velocimeter': Box([], [], (0,), float64), 'walker/tendons_pos': Box(-inf, inf, (6,), float64), 'walker/tendons_vel': Box(-inf, inf, (6,), float64), 'walker/world_zaxis': Box(-inf, inf, (3,), float64))


<shimmy.dm_control_compatibility.DmControlCompatibilityV0 at 0x7f12305aef90>

In [13]:
from ray import tune
from shimmy.dm_control_compatibility import DmControlCompatibilityV0
from env import mice_env

def create_custom_env(cfg):
    return DmControlCompatibilityV0(mice_env.rodent_maze_forage())

# Register the environment with Ray
tune.register_env("MiceEnv", lambda cfg: create_custom_env(cfg))


In [9]:
env = DmControlCompatibilityV0(mice_env.rodent_maze_forage())
print("Observation Space in create_custom_env:", env.observation_space)

Observation Space in create_custom_env: Dict('walker/actuator_activation': Box([], [], (0,), float64), 'walker/appendages_pos': Box(-inf, inf, (15,), float64), 'walker/body_height': Box(-inf, inf, (), float64), 'walker/egocentric_camera': Box(0, 255, (64, 64, 3), uint8), 'walker/end_effectors_pos': Box(-inf, inf, (12,), float64), 'walker/joints_pos': Box(-inf, inf, (56,), float64), 'walker/joints_vel': Box(-inf, inf, (56,), float64), 'walker/sensors_accelerometer': Box([], [], (0,), float64), 'walker/sensors_force': Box([], [], (0,), float64), 'walker/sensors_gyro': Box([], [], (0,), float64), 'walker/sensors_torque': Box([], [], (0,), float64), 'walker/sensors_touch': Box([], [], (0,), float64), 'walker/sensors_velocimeter': Box([], [], (0,), float64), 'walker/tendons_pos': Box(-inf, inf, (6,), float64), 'walker/tendons_vel': Box(-inf, inf, (6,), float64), 'walker/world_zaxis': Box(-inf, inf, (3,), float64))


In [5]:
from ray.rllib.algorithms.dreamerv3 import DreamerV3Config

config = DreamerV3Config().environment(env="MiceEnv")

  if (distutils.version.LooseVersion(tf.__version__) <
  distutils.version.LooseVersion(required_tensorflow_version)):
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


In [6]:
algo = config.build()

`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
  logger.warn(
  """Calls the given function on each Learner L with the args: (L, \*\*kwargs).


TypeError: object of type 'NoneType' has no len()

In [8]:
print("Observation Space:", self.config.observation_space)

NameError: name 'self' is not defined

# Debug Record

24/10/15 Debug:
1. parser.parse_args() 的问题在于 Jupyter Notebook 本身不通过命令行运行，因此无法传递参数给 argparse。
2. 如果你想在 .ipynb 文件中传递参数，可以手动设置参数，而不是从命令行解析。

24/10/16 Debug:
1. RLlib 不接受用function定义的环境作为.environment的输入，
2. 必须将maze_forazing()改为gym环境或者自定义为class变量输入给env
3. 重点考虑手动定义class的方法

24/10/18 Debug:
1. 仔细看RLlib 定义custom env的方法 https://docs.ray.io/en/latest/rllib/rllib-env.html

In [7]:
import sys
from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
from ray.rllib.utils.test_utils import add_rllib_example_script_args

sys.argv = ['script_name','--num-gpus','1','--num-env-runners','4']

parser = add_rllib_example_script_args(
    default_iters=1000000,
    default_reward=800.0,
    default_timesteps=1000000
)
# Use `parser` to add your own custom command line options to this script
# and (if needed) use their values toset up `config` below.

args = parser.parse_args()


config = (
    DreamerV3Config()
    # Use image observations.
    # .environment(
    #     env="CyberMice",
    #     env_config={"from_pixels": True},
    # )
    .environment("MiceEnv-v0")
    # .learners(
    #     num_learners=0 if args.num_gpus == 1 else args.num_gpus,
    #     num_gpus_per_learner=1 if args.num_gpus else 0,
    # )
    # .env_runners(
    #     num_env_runners=(args.num_env_runners or 0),
    #     # If we use >1 GPU and increase the batch size accordingly, we should also
    #     # increase the number of envs per worker.
    #     num_envs_per_env_runner=4 * (args.num_gpus or 1),
    #     remote_worker_envs=True,
    # )
    # .reporting(
    #     metrics_num_episodes_for_smoothing=(args.num_gpus or 1),
    #     report_images_and_videos=False,
    #     report_dream_data=False,
    #     report_individual_batch_item_stats=False,
    # )
    # # See Appendix A.
    # .training(
    #     model_size="S",
    #     training_ratio=512,
    #     batch_size_B=16 * (args.num_gpus or 1),
    # )

)

config.remote_worker_envs=False
rllib_algo = config.build(use_copy=False)


  logger.warn(f"Overriding environment {new_spec.id} already in registry.")
  logger.warn(
  """Calls the given function on each Learner L with the args: (L, \*\*kwargs).


TypeError: object of type 'NoneType' has no len()