From 0f3bf9db5d81a3a0912b197df9a97ce81ecc24c7 Mon Sep 17 00:00:00 2001 From: pseudo-rnd-thoughts Date: Fri, 22 Mar 2024 15:47:45 +0000 Subject: [PATCH 1/2] Fix lunar lander determinism --- gymnasium/envs/box2d/lunar_lander.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index d71fa9150..69ff8a5f6 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -328,6 +328,11 @@ def reset( ): super().reset(seed=seed) self._destroy() + + # Bug's workaround for: https://github.com/Farama-Foundation/Gymnasium/issues/728 + # Not sure why the self._destroy() is not enough to clean(reset) the total world environment elements, need more investigation on the root cause, + # we must create a totally new world for self.reset(), or the bug#728 will happen + self.world = Box2D.b2World(gravity=(0, self.gravity)) self.world.contactListener_keepref = ContactDetector(self) self.world.contactListener = self.world.contactListener_keepref self.game_over = False From 06b97ce21b549ea0c5531826979286e2da1f19f6 Mon Sep 17 00:00:00 2001 From: pseudo-rnd-thoughts Date: Fri, 22 Mar 2024 15:50:05 +0000 Subject: [PATCH 2/2] Add comment --- gymnasium/envs/box2d/lunar_lander.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index 69ff8a5f6..fac0fade3 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -178,7 +178,9 @@ class LunarLander(gym.Env, EzPickle): The recommended value for `turbulence_power` is between 0.0 and 2.0. ## Version History - - v3: Reset wind and turbulence offset (`C`) whenever the environment is reset to ensure statistical independence between consecutive episodes (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium/issues/954)). + - v3: Reset wind and turbulence offset (`C`) whenever the environment is reset to ensure statistical independence + between consecutive episodes (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium/issues/954)). + Fix non-deterministic behaviour due to not fully destroying the world (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium/issues/728)). - v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters - v1: Legs contact with ground added in state vector; contact with ground give +10 reward points, and -10 if then lose contact; reward renormalized to 200; harder initial random push.