Add new MuJoCo environments (#87)

Farama-Foundation · Mar 11, 2024 · 9af2ed0 · 9af2ed0
1 parent 197108a
commit 9af2ed0
Show file tree

Hide file tree

Showing 10 changed files with 187 additions and 1 deletion.
diff --git a/docs/_static/videos/mo-ant.gif b/docs/_static/videos/mo-ant.gif
diff --git a/docs/_static/videos/mo-humanoid.gif b/docs/_static/videos/mo-humanoid.gif
diff --git a/docs/_static/videos/mo-swimmer.gif b/docs/_static/videos/mo-swimmer.gif
diff --git a/docs/_static/videos/mo-walker2d.gif b/docs/_static/videos/mo-walker2d.gif
diff --git a/docs/environments/mujoco.md b/docs/environments/mujoco.md
@@ -11,6 +11,10 @@ Multi-objective versions of Mujoco environments.
 | [`mo-reacher-v4`](https://mo-gymnasium.farama.org/environments/mo-reacher/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-reacher.gif" width="200px">                                                 | Continuous / Discrete               | `[target_1, target_2, target_3, target_4]`                    | Mujoco version of `mo-reacher-v0`, based on `Reacher-v4` [environment](https://gymnasium.farama.org/environments/mujoco/reacher/).                                                                                                                              |
 | [`mo-hopper-v4`](https://mo-gymnasium.farama.org/environments/mo-hopper/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-hopper.gif" width="200px">                                                    | Continuous / Continuous             | `[velocity, height, energy]`                                  | Multi-objective version of [Hopper-v4](https://gymnasium.farama.org/environments/mujoco/hopper/) env.                                                                                                                                                           |
 | [`mo-halfcheetah-v4`](https://mo-gymnasium.farama.org/environments/mo-halfcheetah/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-halfcheetah.gif" width="200px">                                     | Continuous / Continuous             | `[velocity, energy]`                                          | Multi-objective version of [HalfCheetah-v4](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) env. Similar to [Xu et al. 2020](https://github.com/mit-gfx/PGMORL).                                                                                |
+| [`mo-walker2d-v4`](https://mo-gymnasium.farama.org/environments/mo-walker2d/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-walker2d.gif" width="200px">                                              | Continuous / Continuous             | `[velocity, energy]`                                          | Multi-objective version of [Walker2d-v4](https://gymnasium.farama.org/environments/mujoco/walker2d/) env.                                                                                                                                                       |
+| [`mo-ant-v4`](https://mo-gymnasium.farama.org/environments/mo-ant/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-ant.gif" width="200px">                                                             | Continuous / Continuous             | `[x_velocity, y_velocity, energy]`                            | Multi-objective version of [Ant-v4](https://gymnasium.farama.org/environments/mujoco/ant/) env.                                                                                                                                                                 |
+| [`mo-swimmer-v4`](https://mo-gymnasium.farama.org/environments/mo-swimmer/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-swimmer.gif" width="200px">                                                 | Continuous / Continuous             | `[velocity, energy]`                                          | Multi-objective version of [Swimmer-v4](https://gymnasium.farama.org/environments/mujoco/swimmer/) env.                                                                                                                                                         |
+| [`mo-humanoid-v4`](https://mo-gymnasium.farama.org/environments/mo-humanoid/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-humanoid.gif" width="200px">                                              | Continuous / Continuous             | `[velocity, energy]`                                          | Multi-objective version of [Humonoid-v4](https://gymnasium.farama.org/environments/mujoco/humanoid/) env.                                                                                                                                                       |
 
 
 ```{toctree}
@@ -21,5 +25,8 @@ Multi-objective versions of Mujoco environments.
 ./mo-reacher
 ./mo-hopper
 ./mo-halfcheetah
-
+./mo-walker2d
+./mo-ant
+./mo-swimmer
+./mo-humanoid
 ```
diff --git a/mo_gymnasium/envs/mujoco/__init__.py b/mo_gymnasium/envs/mujoco/__init__.py
@@ -20,6 +20,37 @@
     kwargs={"cost_objective": False},
 )
 
+register(
+    id="mo-walker2d-v4",
+    entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv",
+    max_episode_steps=1000,
+)
+
+register(
+    id="mo-ant-v4",
+    entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
+    max_episode_steps=1000,
+)
+
+register(
+    id="mo-ant-2d-v4",
+    entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
+    max_episode_steps=1000,
+    kwargs={"cost_objective": False},
+)
+
+register(
+    id="mo-swimmer-v4",
+    entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv",
+    max_episode_steps=1000,
+)
+
+register(
+    id="mo-humanoid-v4",
+    entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv",
+    max_episode_steps=1000,
+)
+
 register(
     id="mo-reacher-v4",
     entry_point="mo_gymnasium.envs.mujoco.reacher:MOReacherEnv",

diff --git a/mo_gymnasium/envs/mujoco/ant.py b/mo_gymnasium/envs/mujoco/ant.py
@@ -0,0 +1,46 @@
+import numpy as np
+from gymnasium.envs.mujoco.ant_v4 import AntEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOAntEnv(AntEnv, EzPickle):
+    """
+    ## Description
+    Multi-objective version of the AntEnv environment.
+
+    See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/ant/) for more information.
+
+    ## Reward Space
+    The reward is 2- or 3-dimensional:
+    - 0: x-velocity
+    - 1: y-velocity
+    - 2: Control cost of the action
+    If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
+    A healthy reward is added to all objectives.
+    """
+
+    def __init__(self, cost_objective=True, **kwargs):
+        super().__init__(**kwargs)
+        EzPickle.__init__(self, cost_objective, **kwargs)
+        self.cost_objetive = cost_objective
+        self.reward_dim = 3 if cost_objective else 2
+        self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = super().step(action)
+        x_velocity = info["x_velocity"]
+        y_velocity = info["y_velocity"]
+        cost = info["reward_ctrl"]
+        healthy_reward = info["reward_survive"]
+
+        if self.cost_objetive:
+            cost /= self._ctrl_cost_weight  # Ignore the weight in the original AntEnv
+            vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32)
+        else:
+            vec_reward = np.array([x_velocity, y_velocity], dtype=np.float32)
+            vec_reward += cost
+
+        vec_reward += healthy_reward
+
+        return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/humanoid.py b/mo_gymnasium/envs/mujoco/humanoid.py
@@ -0,0 +1,34 @@
+import numpy as np
+from gymnasium.envs.mujoco.humanoid_v4 import HumanoidEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOHumanoidEnv(HumanoidEnv, EzPickle):
+    """
+    ## Description
+    Multi-objective version of the HumanoidEnv environment.
+
+    See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/humanoid/) for more information.
+
+    ## Reward Space
+    The reward is 2-dimensional:
+    - 0: Reward for running forward (x-velocity)
+    - 1: Control cost of the action
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        EzPickle.__init__(self, **kwargs)
+        self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
+        self.reward_dim = 2
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = super().step(action)
+        velocity = info["x_velocity"]
+        negative_cost = 10 * info["reward_quadctrl"]
+        vec_reward = np.array([velocity, negative_cost], dtype=np.float32)
+
+        vec_reward += self.healthy_reward  # All objectives are penalyzed when the agent falls
+
+        return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/swimmer.py b/mo_gymnasium/envs/mujoco/swimmer.py
@@ -0,0 +1,33 @@
+import numpy as np
+from gymnasium.envs.mujoco.swimmer_v4 import SwimmerEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOSwimmerEnv(SwimmerEnv, EzPickle):
+    """
+    ## Description
+    Multi-objective version of the SwimmerEnv environment.
+
+    See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/swimmer/) for more information.
+
+    ## Reward Space
+    The reward is 2-dimensional:
+    - 0: Reward for moving forward (x-velocity)
+    - 1: Control cost of the action
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        EzPickle.__init__(self, **kwargs)
+        self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
+        self.reward_dim = 2
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = super().step(action)
+        velocity = info["x_velocity"]
+        energy = -np.sum(np.square(action))
+
+        vec_reward = np.array([velocity, energy], dtype=np.float32)
+
+        return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/walker2d.py b/mo_gymnasium/envs/mujoco/walker2d.py
@@ -0,0 +1,35 @@
+import numpy as np
+from gymnasium.envs.mujoco.walker2d_v4 import Walker2dEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOWalker2dEnv(Walker2dEnv, EzPickle):
+    """
+    ## Description
+    Multi-objective version of the Walker2dEnv environment.
+
+    See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/walker2d/) for more information.
+
+    ## Reward Space
+    The reward is 2-dimensional:
+    - 0: Reward for running forward (x-velocity)
+    - 1: Control cost of the action
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        EzPickle.__init__(self, **kwargs)
+        self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
+        self.reward_dim = 2
+
+    def step(self, action):
+        observation, reward, terminated, truncated, info = super().step(action)
+        velocity = info["x_velocity"]
+        energy = -np.sum(np.square(action))
+
+        vec_reward = np.array([velocity, energy], dtype=np.float32)
+
+        vec_reward += self.healthy_reward  # All objectives are penalyzed when the agent falls
+
+        return observation, vec_reward, terminated, truncated, info