Skip to content

Commit

Permalink
Add new MuJoCo environments (#87)
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasAlegre committed Mar 11, 2024
1 parent 197108a commit 9af2ed0
Show file tree
Hide file tree
Showing 10 changed files with 187 additions and 1 deletion.
Binary file added docs/_static/videos/mo-ant.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/_static/videos/mo-humanoid.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/_static/videos/mo-swimmer.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/_static/videos/mo-walker2d.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 8 additions & 1 deletion docs/environments/mujoco.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ Multi-objective versions of Mujoco environments.
| [`mo-reacher-v4`](https://mo-gymnasium.farama.org/environments/mo-reacher/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-reacher.gif" width="200px"> | Continuous / Discrete | `[target_1, target_2, target_3, target_4]` | Mujoco version of `mo-reacher-v0`, based on `Reacher-v4` [environment](https://gymnasium.farama.org/environments/mujoco/reacher/). |
| [`mo-hopper-v4`](https://mo-gymnasium.farama.org/environments/mo-hopper/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-hopper.gif" width="200px"> | Continuous / Continuous | `[velocity, height, energy]` | Multi-objective version of [Hopper-v4](https://gymnasium.farama.org/environments/mujoco/hopper/) env. |
| [`mo-halfcheetah-v4`](https://mo-gymnasium.farama.org/environments/mo-halfcheetah/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-halfcheetah.gif" width="200px"> | Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [HalfCheetah-v4](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) env. Similar to [Xu et al. 2020](https://github.com/mit-gfx/PGMORL). |
| [`mo-walker2d-v4`](https://mo-gymnasium.farama.org/environments/mo-walker2d/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-walker2d.gif" width="200px"> | Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [Walker2d-v4](https://gymnasium.farama.org/environments/mujoco/walker2d/) env. |
| [`mo-ant-v4`](https://mo-gymnasium.farama.org/environments/mo-ant/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-ant.gif" width="200px"> | Continuous / Continuous | `[x_velocity, y_velocity, energy]` | Multi-objective version of [Ant-v4](https://gymnasium.farama.org/environments/mujoco/ant/) env. |
| [`mo-swimmer-v4`](https://mo-gymnasium.farama.org/environments/mo-swimmer/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-swimmer.gif" width="200px"> | Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [Swimmer-v4](https://gymnasium.farama.org/environments/mujoco/swimmer/) env. |
| [`mo-humanoid-v4`](https://mo-gymnasium.farama.org/environments/mo-humanoid/) <br><img src="https://raw.githubusercontent.com/Farama-Foundation/MO-Gymnasium/main/docs/_static/videos/mo-humanoid.gif" width="200px"> | Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [Humonoid-v4](https://gymnasium.farama.org/environments/mujoco/humanoid/) env. |


```{toctree}
Expand All @@ -21,5 +25,8 @@ Multi-objective versions of Mujoco environments.
./mo-reacher
./mo-hopper
./mo-halfcheetah
./mo-walker2d
./mo-ant
./mo-swimmer
./mo-humanoid
```
31 changes: 31 additions & 0 deletions mo_gymnasium/envs/mujoco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,37 @@
kwargs={"cost_objective": False},
)

register(
id="mo-walker2d-v4",
entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-v4",
entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-2d-v4",
entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-swimmer-v4",
entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv",
max_episode_steps=1000,
)

register(
id="mo-humanoid-v4",
entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv",
max_episode_steps=1000,
)

register(
id="mo-reacher-v4",
entry_point="mo_gymnasium.envs.mujoco.reacher:MOReacherEnv",
Expand Down
46 changes: 46 additions & 0 deletions mo_gymnasium/envs/mujoco/ant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import numpy as np
from gymnasium.envs.mujoco.ant_v4 import AntEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOAntEnv(AntEnv, EzPickle):
"""
## Description
Multi-objective version of the AntEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/ant/) for more information.
## Reward Space
The reward is 2- or 3-dimensional:
- 0: x-velocity
- 1: y-velocity
- 2: Control cost of the action
If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
A healthy reward is added to all objectives.
"""

def __init__(self, cost_objective=True, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, cost_objective, **kwargs)
self.cost_objetive = cost_objective
self.reward_dim = 3 if cost_objective else 2
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
x_velocity = info["x_velocity"]
y_velocity = info["y_velocity"]
cost = info["reward_ctrl"]
healthy_reward = info["reward_survive"]

if self.cost_objetive:
cost /= self._ctrl_cost_weight # Ignore the weight in the original AntEnv
vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32)
else:
vec_reward = np.array([x_velocity, y_velocity], dtype=np.float32)
vec_reward += cost

vec_reward += healthy_reward

return observation, vec_reward, terminated, truncated, info
34 changes: 34 additions & 0 deletions mo_gymnasium/envs/mujoco/humanoid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import numpy as np
from gymnasium.envs.mujoco.humanoid_v4 import HumanoidEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOHumanoidEnv(HumanoidEnv, EzPickle):
"""
## Description
Multi-objective version of the HumanoidEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/humanoid/) for more information.
## Reward Space
The reward is 2-dimensional:
- 0: Reward for running forward (x-velocity)
- 1: Control cost of the action
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
negative_cost = 10 * info["reward_quadctrl"]
vec_reward = np.array([velocity, negative_cost], dtype=np.float32)

vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls

return observation, vec_reward, terminated, truncated, info
33 changes: 33 additions & 0 deletions mo_gymnasium/envs/mujoco/swimmer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import numpy as np
from gymnasium.envs.mujoco.swimmer_v4 import SwimmerEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOSwimmerEnv(SwimmerEnv, EzPickle):
"""
## Description
Multi-objective version of the SwimmerEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/swimmer/) for more information.
## Reward Space
The reward is 2-dimensional:
- 0: Reward for moving forward (x-velocity)
- 1: Control cost of the action
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
energy = -np.sum(np.square(action))

vec_reward = np.array([velocity, energy], dtype=np.float32)

return observation, vec_reward, terminated, truncated, info
35 changes: 35 additions & 0 deletions mo_gymnasium/envs/mujoco/walker2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import numpy as np
from gymnasium.envs.mujoco.walker2d_v4 import Walker2dEnv
from gymnasium.spaces import Box
from gymnasium.utils import EzPickle


class MOWalker2dEnv(Walker2dEnv, EzPickle):
"""
## Description
Multi-objective version of the Walker2dEnv environment.
See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/walker2d/) for more information.
## Reward Space
The reward is 2-dimensional:
- 0: Reward for running forward (x-velocity)
- 1: Control cost of the action
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
EzPickle.__init__(self, **kwargs)
self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
self.reward_dim = 2

def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
energy = -np.sum(np.square(action))

vec_reward = np.array([velocity, energy], dtype=np.float32)

vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls

return observation, vec_reward, terminated, truncated, info

0 comments on commit 9af2ed0

Please sign in to comment.