diff --git a/mo_gymnasium/envs/lunar_lander/lunar_lander.py b/mo_gymnasium/envs/lunar_lander/lunar_lander.py index caeab3aa..091190a9 100644 --- a/mo_gymnasium/envs/lunar_lander/lunar_lander.py +++ b/mo_gymnasium/envs/lunar_lander/lunar_lander.py @@ -35,7 +35,12 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Result reward, shaping reward, main engine cost, side engine cost - self.reward_space = spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32) + self.reward_space = spaces.Box( + low=np.array([-100, -np.inf, -1, -1]), + high=np.array([100, np.inf, 0, 0]), + shape=(4,), + dtype=np.float32, + ) self.reward_dim = 4 def step(self, action): diff --git a/mo_gymnasium/envs/mario/mario.py b/mo_gymnasium/envs/mario/mario.py index 21842ed9..bb7949f4 100644 --- a/mo_gymnasium/envs/mario/mario.py +++ b/mo_gymnasium/envs/mario/mario.py @@ -51,7 +51,11 @@ def __init__( self.render_mode = render_mode self.objectives = set(objectives) - self.reward_space = gym.spaces.Box(high=np.inf, low=-np.inf, shape=(len(objectives),)) + self.reward_space = gym.spaces.Box( + low=np.array([-np.inf, -np.inf, -25, 0, 0]), + high=np.array([np.inf, 0, 0, 100, np.inf]), + shape=(len(objectives),), + ) self.reward_dim = len(objectives) # observation space for the environment is static across all instances diff --git a/mo_gymnasium/envs/minecart/minecart.py b/mo_gymnasium/envs/minecart/minecart.py index e6851f9a..a19fb34a 100644 --- a/mo_gymnasium/envs/minecart/minecart.py +++ b/mo_gymnasium/envs/minecart/minecart.py @@ -192,7 +192,11 @@ def __init__( self.observation_space = Box(-np.ones(7), np.ones(7), dtype=np.float32) self.action_space = Discrete(6) - self.reward_space = Box(low=-1, high=self.capacity, shape=(self.ore_cnt + 1,)) + self.reward_space = Box( + low=np.append(np.zeros(self.ore_cnt), -1.0), + high=np.append(np.ones(self.ore_cnt) * self.capacity, 0.0), + shape=(self.ore_cnt + 1,), + ) self.reward_dim = self.ore_cnt + 1 def convex_coverage_set(self, gamma: float, symmetric: bool = True) -> List[np.ndarray]: diff --git a/mo_gymnasium/envs/mountain_car/mountain_car.py b/mo_gymnasium/envs/mountain_car/mountain_car.py index 54669588..6e88acca 100644 --- a/mo_gymnasium/envs/mountain_car/mountain_car.py +++ b/mo_gymnasium/envs/mountain_car/mountain_car.py @@ -24,7 +24,7 @@ def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): super().__init__(render_mode, goal_velocity) EzPickle.__init__(self, render_mode, goal_velocity) - self.reward_space = spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32) + self.reward_space = spaces.Box(low=np.array([-1, -1, -1]), high=np.array([-1, 0, 0]), shape=(3,), dtype=np.float32) self.reward_dim = 3 def step(self, action: int): diff --git a/mo_gymnasium/envs/resource_gathering/resource_gathering.py b/mo_gymnasium/envs/resource_gathering/resource_gathering.py index 464c4a6d..1c4e2b04 100644 --- a/mo_gymnasium/envs/resource_gathering/resource_gathering.py +++ b/mo_gymnasium/envs/resource_gathering/resource_gathering.py @@ -76,7 +76,7 @@ def __init__(self, render_mode: Optional[str] = None): # action space specification: 1 dimension, 0 up, 1 down, 2 left, 3 right self.action_space = Discrete(4) # reward space: - self.reward_space = Box(low=-1, high=1, shape=(3,), dtype=np.float32) + self.reward_space = Box(low=np.array([0.0, 0.0, -1.0]), high=np.array([1.0, 1.0, 0]), shape=(3,), dtype=np.float32) self.reward_dim = 3 # pygame