From 65bfa85c13dbb6bdf047b3a9eb684521d564afe9 Mon Sep 17 00:00:00 2001 From: Fangyuan Date: Wed, 21 Jun 2023 07:07:58 +0800 Subject: [PATCH] bug fix: compute_reward for batch input (#153) --- gymnasium_robotics/envs/maze/maze.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gymnasium_robotics/envs/maze/maze.py b/gymnasium_robotics/envs/maze/maze.py index b9d56742..a8bac704 100644 --- a/gymnasium_robotics/envs/maze/maze.py +++ b/gymnasium_robotics/envs/maze/maze.py @@ -349,10 +349,11 @@ def add_xy_position_noise(self, xy_pos: np.ndarray) -> np.ndarray: def compute_reward( self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info ) -> float: + d = np.linalg.norm(achieved_goal - desired_goal, axis=-1) if self.reward_type == "dense": - return np.exp(-np.linalg.norm(desired_goal - achieved_goal)) + return np.exp(-d) elif self.reward_type == "sparse": - return 1.0 if np.linalg.norm(achieved_goal - desired_goal) <= 0.45 else 0.0 + return -(d > 0.45).astype(np.float32) def compute_terminated( self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info