Farama-Foundation · LucasAlegre · Dec 14, 2022 · Dec 12, 2022 · Dec 13, 2022
diff --git a/README.md b/README.md
@@ -56,7 +56,8 @@ You can also check more examples in this colab notebook!
 | `four-room-v0` <br><img src="https://raw.githubusercontent.com/LucasAlegre/mo-gym/main/screenshots/four-room.png" width="200px">                         | Discrete / Discrete              | `[item1, item2, item3]`                            | Agent must collect three different types of items in the map and reach the goal. From [Alegre et al. 2022](https://proceedings.mlr.press/v162/alegre22a.html).                                                                                                                                             |
 | `water-reservoir-v0`                                                                                                                                     | Continuous / Continuous          | `[cost_flooding, deficit_water]`                   | A Water reservoir environment. The agent executes a continuous action, corresponding to the amount of water released by the dam. From [Pianosi et al. 2013](https://iwaponline.com/jh/article/15/2/258/3425/Tree-based-fitted-Q-iteration-for-multi-objective).                                            |
 | `mo-mountaincar-v0` <br><img src="https://raw.githubusercontent.com/LucasAlegre/mo-gym/main/screenshots/mo-mountaincar.png" width="200px">               | Continuous / Discrete            | `[time_penalty, reverse_penalty, forward_penalty]` | Classic Mountain Car env, but with extra penalties for the forward and reverse actions. From [Vamplew et al. 2011](https://www.researchgate.net/publication/220343783_Empirical_evaluation_methods_for_multiobjective_reinforcement_learning_algorithms).                                                  |
-| `mo-MountainCarContinuous-v0` <br><img src="https://raw.githubusercontent.com/LucasAlegre/mo-gym/main/screenshots/mo-mountaincar.png" width="200px">     | Continuous / Continuous          | `[time_penalty, fuel_consumption_penalty]`         | Continuous Mountain Car env, but with penalties for fuel consumption.                                                                                                                                                                                                                                      |
+| `mo-MountainCarContinuous-v0` <br><img src="https://raw.githubusercontent.com/LucasAlegre/mo-gym/main/screenshots/mo-mountaincar.png" width="200px">     | Continuous / Continuous          | `[time_penalty, fuel_consumption_penalty]`         | Continuous Mountain Car env, but with penalties for fuel consumption.  |
+| `mo-lunar-lander-v2` <br><img src="https://raw.githubusercontent.com/LucasAlegre/mo-gym/main/screenshots/lunarlander.png" width="200px">     | Continuous / Discrete or Continuous          | `[landed, shaped_reward, main_engine_fuel, side_engine_fuel]`         | MO version of the "LunarLander-v2" environment. Objectives defined similarly as in [Hung et al. 2022](https://openreview.net/forum?id=AwWaBXLIJE). |
 | `mo-reacher-v0` <br><img src="https://raw.githubusercontent.com/LucasAlegre/mo-gym/main/screenshots/reacher.png" width="200px">                          | Continuous / Discrete            | `[target_1, target_2, target_3, target_4]`         | Reacher robot from [PyBullet](https://github.com/benelot/pybullet-gym/blob/ec9e87459dd76d92fe3e59ee4417e5a665504f62/pybulletgym/envs/roboschool/robots/manipulators/reacher.py), but there are 4 different target positions. From [Alegre et al. 2022](https://proceedings.mlr.press/v162/alegre22a.html). |
 | `minecart-v0` <br><img src="https://raw.githubusercontent.com/LucasAlegre/mo-gym/main/screenshots/minecart.png" width="200px">                           | Continuous or Image / Discrete   | `[ore1, ore2, fuel]`                               | Agent must collect two types of ores and minimize fuel consumption. From [Abels et al. 2019](https://arxiv.org/abs/1809.07803v2).                                                                                                                                                                          |
 | `mo-highway-v0` and `mo-highway-fast-v0` <br><img src="https://raw.githubusercontent.com/LucasAlegre/mo-gym/main/screenshots/highway.png" width="200px"> | Continuous / Discrete            | `[speed, right_lane, collision]`                   | The agent's objective is to reach a high speed while avoiding collisions with neighbouring vehicles and staying on the rightest lane. From [highway-env](https://github.com/eleurent/highway-env).                                                                                                         |

diff --git a/mo_gym/__init__.py b/mo_gym/__init__.py
@@ -12,5 +12,6 @@
 import mo_gym.resource_gathering
 import mo_gym.water_reservoir
 import mo_gym.continuous_mountain_car
+import mo_gym.lunar_lander
 from mo_gym.evaluation import *
 from mo_gym.utils import *
diff --git a/mo_gym/lunar_lander/__init__.py b/mo_gym/lunar_lander/__init__.py
@@ -0,0 +1,7 @@
+from gym.envs.registration import register
+
+
+register(id="mo-lunar-lander-v2", 
+        entry_point="mo_gym.lunar_lander.lunar_lander:MOLunarLander", 
+        max_episode_steps=1000,
+)
diff --git a/mo_gym/lunar_lander/lunar_lander.py b/mo_gym/lunar_lander/lunar_lander.py
@@ -0,0 +1,178 @@
+import numpy as np
+import math
+from gym.envs.box2d.lunar_lander import LunarLander, SCALE, FPS, MAIN_ENGINE_POWER, SIDE_ENGINE_POWER, SIDE_ENGINE_AWAY, SIDE_ENGINE_HEIGHT, VIEWPORT_H, VIEWPORT_W, LEG_DOWN
+from gym import spaces
+
+
+class MOLunarLander(LunarLander):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        # Result reward, shaping reward, main engine cost, side engine cost
+        self.reward_space = spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32)
+
+    def step(self, action):
+        assert self.lander is not None
+
+        # Update wind
+        assert self.lander is not None, "You forgot to call reset()"
+        if self.enable_wind and not (
+            self.legs[0].ground_contact or self.legs[1].ground_contact
+        ):
+            # the function used for wind is tanh(sin(2 k x) + sin(pi k x)),
+            # which is proven to never be periodic, k = 0.01
+            wind_mag = (
+                math.tanh(
+                    math.sin(0.02 * self.wind_idx)
+                    + (math.sin(math.pi * 0.01 * self.wind_idx))
+                )
+                * self.wind_power
+            )
+            self.wind_idx += 1
+            self.lander.ApplyForceToCenter(
+                (wind_mag, 0.0),
+                True,
+            )
+
+            # the function used for torque is tanh(sin(2 k x) + sin(pi k x)),
+            # which is proven to never be periodic, k = 0.01
+            torque_mag = math.tanh(
+                math.sin(0.02 * self.torque_idx)
+                + (math.sin(math.pi * 0.01 * self.torque_idx))
+            ) * (self.turbulence_power)
+            self.torque_idx += 1
+            self.lander.ApplyTorque(
+                (torque_mag),
+                True,
+            )
+
+        if self.continuous:
+            action = np.clip(action, -1, +1).astype(np.float32)
+        else:
+            assert self.action_space.contains(
+                action
+            ), f"{action!r} ({type(action)}) invalid "
+
+        # Engines
+        tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
+        side = (-tip[1], tip[0])
+        dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
+
+        m_power = 0.0
+        if (self.continuous and action[0] > 0.0) or (
+            not self.continuous and action == 2
+        ):
+            # Main engine
+            if self.continuous:
+                m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5  # 0.5..1.0
+                assert m_power >= 0.5 and m_power <= 1.0
+            else:
+                m_power = 1.0
+            # 4 is move a bit downwards, +-2 for randomness
+            ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
+            oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
+            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
+            p = self._create_particle(
+                3.5,  # 3.5 is here to make particle speed adequate
+                impulse_pos[0],
+                impulse_pos[1],
+                m_power,
+            )  # particles are just a decoration
+            p.ApplyLinearImpulse(
+                (ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
+                impulse_pos,
+                True,
+            )
+            self.lander.ApplyLinearImpulse(
+                (-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
+                impulse_pos,
+                True,
+            )
+
+        s_power = 0.0
+        if (self.continuous and np.abs(action[1]) > 0.5) or (
+            not self.continuous and action in [1, 3]
+        ):
+            # Orientation engines
+            if self.continuous:
+                direction = np.sign(action[1])
+                s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
+                assert s_power >= 0.5 and s_power <= 1.0
+            else:
+                direction = action - 2
+                s_power = 1.0
+            ox = tip[0] * dispersion[0] + side[0] * (
+                3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
+            )
+            oy = -tip[1] * dispersion[0] - side[1] * (
+                3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
+            )
+            impulse_pos = (
+                self.lander.position[0] + ox - tip[0] * 17 / SCALE,
+                self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
+            )
+            p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
+            p.ApplyLinearImpulse(
+                (ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
+                impulse_pos,
+                True,
+            )
+            self.lander.ApplyLinearImpulse(
+                (-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
+                impulse_pos,
+                True,
+            )
+
+        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
+
+        pos = self.lander.position
+        vel = self.lander.linearVelocity
+        state = [
+            (pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
+            (pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
+            vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
+            vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
+            self.lander.angle,
+            20.0 * self.lander.angularVelocity / FPS,
+            1.0 if self.legs[0].ground_contact else 0.0,
+            1.0 if self.legs[1].ground_contact else 0.0,
+        ]
+        assert len(state) == 8
+
+        reward = 0
+        vector_reward = np.zeros(4, dtype=np.float32)
+        shaping = (
+            -100 * np.sqrt(state[0] * state[0] + state[1] * state[1])
+            - 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])
+            - 100 * abs(state[4])
+            + 10 * state[6]
+            + 10 * state[7]
+        )  # And ten points for legs contact, the idea is if you
+        # lose contact again after landing, you get negative reward
+        if self.prev_shaping is not None:
+            reward = shaping - self.prev_shaping
+            vector_reward[1] = shaping - self.prev_shaping
+        self.prev_shaping = shaping
+
+        reward -= (
+            m_power * 0.30
+        )  # less fuel spent is better, about -30 for heuristic landing
+        vector_reward[2] = -m_power * 0.30
+        reward -= s_power * 0.03
+        vector_reward[3] = -s_power * 0.03
+
+        terminated = False
+        if self.game_over or abs(state[0]) >= 1.0:
+            terminated = True
+            reward = -100
+            vector_reward[0] = -100
+        if not self.lander.awake:
+            terminated = True
+            reward = +100
+            vector_reward[0] = +100
+
+        if self.render_mode == "human":
+            self.render()
+
+        return np.array(state, dtype=np.float32), vector_reward, terminated, False, {'original_reward': reward}
diff --git a/screenshots/lunarlander.png b/screenshots/lunarlander.png