Skip to content

Commit

Permalink
Merge pull request #59 from Farama-Foundation/doc/improve-reward-bounds
Browse files Browse the repository at this point in the history
Improve reward bounds on some environments
  • Loading branch information
ffelten committed Apr 17, 2023
2 parents ca67943 + 53b7240 commit 0b2fed2
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 5 deletions.
7 changes: 6 additions & 1 deletion mo_gymnasium/envs/lunar_lander/lunar_lander.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

# Result reward, shaping reward, main engine cost, side engine cost
self.reward_space = spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32)
self.reward_space = spaces.Box(
low=np.array([-100, -np.inf, -1, -1]),
high=np.array([100, np.inf, 0, 0]),
shape=(4,),
dtype=np.float32,
)
self.reward_dim = 4

def step(self, action):
Expand Down
6 changes: 5 additions & 1 deletion mo_gymnasium/envs/mario/mario.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ def __init__(
self.render_mode = render_mode

self.objectives = set(objectives)
self.reward_space = gym.spaces.Box(high=np.inf, low=-np.inf, shape=(len(objectives),))
self.reward_space = gym.spaces.Box(
low=np.array([-np.inf, -np.inf, -25, 0, 0]),
high=np.array([np.inf, 0, 0, 100, np.inf]),
shape=(len(objectives),),
)
self.reward_dim = len(objectives)

# observation space for the environment is static across all instances
Expand Down
6 changes: 5 additions & 1 deletion mo_gymnasium/envs/minecart/minecart.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,11 @@ def __init__(
self.observation_space = Box(-np.ones(7), np.ones(7), dtype=np.float32)

self.action_space = Discrete(6)
self.reward_space = Box(low=-1, high=self.capacity, shape=(self.ore_cnt + 1,))
self.reward_space = Box(
low=np.append(np.zeros(self.ore_cnt), -1.0),
high=np.append(np.ones(self.ore_cnt) * self.capacity, 0.0),
shape=(self.ore_cnt + 1,),
)
self.reward_dim = self.ore_cnt + 1

def convex_coverage_set(self, gamma: float, symmetric: bool = True) -> List[np.ndarray]:
Expand Down
2 changes: 1 addition & 1 deletion mo_gymnasium/envs/mountain_car/mountain_car.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
super().__init__(render_mode, goal_velocity)
EzPickle.__init__(self, render_mode, goal_velocity)

self.reward_space = spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32)
self.reward_space = spaces.Box(low=np.array([-1, -1, -1]), high=np.array([-1, 0, 0]), shape=(3,), dtype=np.float32)
self.reward_dim = 3

def step(self, action: int):
Expand Down
2 changes: 1 addition & 1 deletion mo_gymnasium/envs/resource_gathering/resource_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self, render_mode: Optional[str] = None):
# action space specification: 1 dimension, 0 up, 1 down, 2 left, 3 right
self.action_space = Discrete(4)
# reward space:
self.reward_space = Box(low=-1, high=1, shape=(3,), dtype=np.float32)
self.reward_space = Box(low=np.array([0.0, 0.0, -1.0]), high=np.array([1.0, 1.0, 0]), shape=(3,), dtype=np.float32)
self.reward_dim = 3

# pygame
Expand Down

0 comments on commit 0b2fed2

Please sign in to comment.