diff --git a/gymnasium_robotics/envs/maze/maze.py b/gymnasium_robotics/envs/maze/maze.py index b9d56742..a8bac704 100644 --- a/gymnasium_robotics/envs/maze/maze.py +++ b/gymnasium_robotics/envs/maze/maze.py @@ -349,10 +349,11 @@ def add_xy_position_noise(self, xy_pos: np.ndarray) -> np.ndarray: def compute_reward( self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info ) -> float: + d = np.linalg.norm(achieved_goal - desired_goal, axis=-1) if self.reward_type == "dense": - return np.exp(-np.linalg.norm(desired_goal - achieved_goal)) + return np.exp(-d) elif self.reward_type == "sparse": - return 1.0 if np.linalg.norm(achieved_goal - desired_goal) <= 0.45 else 0.0 + return -(d > 0.45).astype(np.float32) def compute_terminated( self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info