bug fix: compute_reward for batch input

Farama-Foundation · May 19, 2023 · c4700cd · c4700cd
1 parent da809b8
commit c4700cd
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/gymnasium_robotics/envs/maze/maze.py b/gymnasium_robotics/envs/maze/maze.py
@@ -349,10 +349,11 @@ def add_xy_position_noise(self, xy_pos: np.ndarray) -> np.ndarray:
     def compute_reward(
         self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info
     ) -> float:
+        d = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
         if self.reward_type == "dense":
-            return np.exp(-np.linalg.norm(desired_goal - achieved_goal))
+            return np.exp(-d)
         elif self.reward_type == "sparse":
-            return 1.0 if np.linalg.norm(achieved_goal - desired_goal) <= 0.45 else 0.0
+            return - (d > 0.45).astype(np.float32)
 
     def compute_terminated(
         self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info