Farama-Foundation · LucasAlegre · Feb 28, 2023 · Feb 28, 2023 · Feb 28, 2023
diff --git a/mo_gymnasium/envs/minecart/minecart.py b/mo_gymnasium/envs/minecart/minecart.py
@@ -4,7 +4,7 @@
 from copy import deepcopy
 from math import ceil
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional
 
 import gymnasium as gym
 import numpy as np
@@ -122,6 +122,13 @@ class Minecart(gym.Env, EzPickle):
     ## Episode Termination
     The episode ends when the cart returns to the base.
 
+    ## Arguments
+    - render_mode: The render mode to use. Can be "rgb_array" or "human".
+    - image_observation: If True, the observation is a RGB image of the environment.
+    - frame_skip: How many times each action is repeated. Default: 4
+    - incremental_frame_skip: Whether actions are repeated incrementally. Default: True
+    - config: Path to the .json configuration file. See the default configuration file for more information: https://github.com/Farama-Foundation/MO-Gymnasium/blob/main/mo_gymnasium/envs/minecart/mine_config.json
+
     ## Credits
     The code was refactored from [Axel Abels' source](https://github.com/axelabels/DynMORL).
     """
@@ -131,15 +138,20 @@ class Minecart(gym.Env, EzPickle):
     def __init__(
         self,
         render_mode: Optional[str] = None,
-        image_observation=False,
+        image_observation: bool = False,
+        frame_skip: int = 4,
+        incremental_frane_skip: bool = True,
         config=str(Path(__file__).parent.absolute()) + "/mine_config.json",
     ):
-        EzPickle.__init__(self, render_mode, image_observation, config)
+        EzPickle.__init__(self, render_mode, image_observation, frame_skip, incremental_frane_skip, config)
 
         self.render_mode = render_mode
         self.screen = None
         self.last_render_mode_used = None
         self.config = config
+        self.frame_skip = frame_skip
+        assert self.frame_skip > 0, "Frame skip must be greater than 0."
+        self.incremental_frame_skip = incremental_frane_skip
 
         with open(self.config) as f:
             data = json.load(f)
@@ -185,33 +197,29 @@ def __init__(
     def obj_cnt(self):
         return self.ore_cnt + 1
 
-    def convex_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_skip=True, symmetric=True):
+    def convex_coverage_set(self, gamma: float, symmetric: bool = True) -> List[np.ndarray]:
         """
-        Computes an approximate convex coverage set
+        Computes an approximate convex coverage set (CCS).
 
-        Keyword Arguments:
-            frame_skip {int} -- How many times each action is repeated (default: {1})
-            discount {float} -- Discount factor to apply to rewards (default: {1})
-            incremental_frame_skip {bool} -- Whether actions are repeated incrementally (default: {1})
-            symmetric {bool} -- If true, we assume the pattern of accelerations from the base to the mine is the same as from the mine to the base (default: {True})
+        Args:
+            gamma (float): Discount factor to apply to rewards.
+            symmetric (bool): If true, we assume the pattern of accelerations from the base to the mine is the same as from the mine to the base. Default: True
 
         Returns:
             The convex coverage set
         """
-        policies = self.pareto_coverage_set(frame_skip, discount, incremental_frame_skip, symmetric)
+        policies = self.pareto_front(gamma, symmetric)
         origin = np.min(policies, axis=0)
         extended_policies = [origin] + policies
         return [policies[idx - 1] for idx in ConvexHull(extended_policies).vertices if idx != 0]
 
-    def pareto_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_skip=True, symmetric=True):
+    def pareto_front(self, gamma: float, symmetric: bool = True) -> List[np.ndarray]:
         """
-        Computes an approximate pareto coverage set
+        Computes an approximate pareto front.
 
-        Keyword Arguments:
-            frame_skip {int} -- How many times each action is repeated (default: {1})
-            discount {float} -- Discount factor to apply to rewards (default: {1})
-            incremental_frame_skip {bool} -- Whether actions are repeated incrementally (default: {1})
-            symmetric {bool} -- If true, we assume the pattern of accelerations from the base to the mine is the same as from the mine to the base (default: {True})
+        Args:
+            gamma (float): Discount factor to apply to rewards
+            symmetric (bool): If true, we assume the pattern of accelerations from the base to the mine is the same as from the mine to the base. Default: True
 
         Returns:
             The pareto coverage set
@@ -231,16 +239,16 @@ def pareto_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_ski
 
             # Number of rotations required to face the mine
             angle = compute_angle(mine.pos, HOME_POS, [1, 1])
-            rotations = int(ceil(abs(angle) / (ROTATION * frame_skip)))
+            rotations = int(ceil(abs(angle) / (ROTATION * self.frame_skip)))
 
             # Build pattern of accelerations/nops to reach the mine
             # initialize with single acceleration
             queue = [
                 {
-                    "speed": ACCELERATION * frame_skip,
-                    "dist": mine_distance - frame_skip * (frame_skip + 1) / 2 * ACCELERATION
-                    if incremental_frame_skip
-                    else mine_distance - ACCELERATION * frame_skip * frame_skip,
+                    "speed": ACCELERATION * self.frame_skip,
+                    "dist": mine_distance - self.frame_skip * (self.frame_skip + 1) / 2 * ACCELERATION
+                    if self.incremental_frame_skip
+                    else mine_distance - ACCELERATION * self.frame_skip * self.frame_skip,
                     "seq": [ACT_ACCEL],
                 }
             ]
@@ -249,11 +257,11 @@ def pareto_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_ski
             while len(queue) > 0:
                 seq = queue.pop()
                 # accelerate
-                new_speed = seq["speed"] + ACCELERATION * frame_skip
+                new_speed = seq["speed"] + ACCELERATION * self.frame_skip
                 accelerations = new_speed / ACCELERATION
                 movement = (
                     accelerations * (accelerations + 1) / 2 * ACCELERATION
-                    - (accelerations - frame_skip) * ((accelerations - frame_skip) + 1) / 2 * ACCELERATION
+                    - (accelerations - self.frame_skip) * ((accelerations - self.frame_skip) + 1) / 2 * ACCELERATION
                 )
                 dist = seq["dist"] - movement
                 speed = new_speed
@@ -262,7 +270,7 @@ def pareto_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_ski
                 else:
                     queue.append({"speed": speed, "dist": dist, "seq": seq["seq"] + [ACT_ACCEL]})
                 # idle
-                dist = seq["dist"] - seq["speed"] * frame_skip
+                dist = seq["dist"] - seq["speed"] * self.frame_skip
 
                 if dist <= 0:
                     trimmed_sequences.append(seq["seq"] + [ACT_NONE])
@@ -276,7 +284,7 @@ def pareto_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_ski
                     )
 
             # Build rational mining sequences
-            mine_means = mine.distribution_means() * frame_skip
+            mine_means = mine.distribution_means() * self.frame_skip
             mn_sum = np.sum(mine_means)
             # on average it takes up to this many actions to fill cart
             max_mine_actions = 0 if mn_sum == 0 else int(ceil(self.capacity / mn_sum))
@@ -296,7 +304,7 @@ def pareto_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_ski
                         itertools.product(
                             [[ACT_LEFT] * rotations],
                             trimmed_sequences,
-                            [[ACT_BRAKE] + [ACT_LEFT] * (180 // (ROTATION * frame_skip))],
+                            [[ACT_BRAKE] + [ACT_LEFT] * (180 // (ROTATION * self.frame_skip))],
                             mine_sequences,
                             trimmed_sequences,
                         ),
@@ -312,7 +320,7 @@ def pareto_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_ski
                         itertools.product(
                             [[ACT_LEFT] * rotations],
                             trimmed_sequences,
-                            [[ACT_BRAKE] + [ACT_LEFT] * (180 // (ROTATION * frame_skip))],
+                            [[ACT_BRAKE] + [ACT_LEFT] * (180 // (ROTATION * self.frame_skip))],
                             mine_sequences,
                         ),
                     )
@@ -332,7 +340,7 @@ def pareto_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_ski
                         itertools.product(
                             [[ACT_LEFT] * rotations],
                             trimmed_sequences,
-                            [[ACT_LEFT] * (180 // (ROTATION * frame_skip))],
+                            [[ACT_LEFT] * (180 // (ROTATION * self.frame_skip))],
                             trimmed_sequences,
                         ),
                     )
@@ -347,12 +355,12 @@ def pareto_coverage_set(self, frame_skip=1, discount=0.98, incremental_frame_ski
                         itertools.product(
                             [[ACT_LEFT] * rotations],
                             trimmed_sequences,
-                            [[ACT_LEFT] * (180 // (ROTATION * frame_skip))],
+                            [[ACT_LEFT] * (180 // (ROTATION * self.frame_skip))],
                         ),
                     )
 
             # Compute rewards for each sequence
-            fuel_costs = np.array([f * frame_skip for f in FUEL_LIST])
+            fuel_costs = np.array([f * self.frame_skip for f in FUEL_LIST])
 
             def maxlen(l):
                 if len(l) == 0:
@@ -361,9 +369,14 @@ def maxlen(l):
 
             longest_pattern = maxlen(trimmed_sequences)
             max_len = (
-                rotations + longest_pattern + 1 + (180 // (ROTATION * frame_skip)) + maxlen(mine_sequences) + longest_pattern
+                rotations
+                + longest_pattern
+                + 1
+                + (180 // (ROTATION * self.frame_skip))
+                + maxlen(mine_sequences)
+                + longest_pattern
             )
-            discount_map = discount ** np.arange(max_len)
+            discount_map = gamma ** np.arange(max_len)
             for s in all_sequences:
                 reward = np.zeros((len(s), self.obj_cnt()))
                 reward[:, -1] = fuel_costs[s]
@@ -417,55 +430,37 @@ def initialize_mines(self):
             mine_sprite.rect.centery = (mine.pos[1] * (1 - 2 * MARGIN)) * HEIGHT + MARGIN * HEIGHT
             self.mine_rects.append(mine_sprite.rect)
 
-    def step(self, action, frame_skip=4, incremental_frame_skip=True):
-        """Perform the given action `frame_skip` times
-         ["Mine", "Left", "Right", "Accelerate", "Brake", "None"]
-        Arguments:
-            action {int} -- Action to perform, ACT_MINE (0), ACT_LEFT (1), ACT_RIGHT (2), ACT_ACCEL (3), ACT_BRAKE (4) or ACT_NONE (5)
-
-        Keyword Arguments:
-            frame_skip {int} -- Repeat the action this many times (default: {1})
-            incremental_frame_skip {int} -- If True, frame_skip actions are performed in succession, otherwise the repeated actions are performed simultaneously (e.g., 4 accelerations are performed and then the cart moves).
-
-        Returns:
-            tuple -- (state, reward, terminal) tuple
-        """
+    def step(self, action):
         change = False  # Keep track of whether the state has changed
-
-        if action < 0 or action >= ACTION_COUNT:
-            action = ACT_NONE
-
         reward = np.zeros(self.ore_cnt + 1)
-        if frame_skip < 1:
-            frame_skip = 1
 
-        reward[-1] = FUEL_IDLE * frame_skip
+        reward[-1] = FUEL_IDLE * self.frame_skip
 
         if action == ACT_ACCEL:
-            reward[-1] += FUEL_ACC * frame_skip
+            reward[-1] += FUEL_ACC * self.frame_skip
         elif action == ACT_MINE:
-            reward[-1] += FUEL_MINE * frame_skip
+            reward[-1] += FUEL_MINE * self.frame_skip
 
-        for _ in range(frame_skip if incremental_frame_skip else 1):
+        for _ in range(self.frame_skip if self.incremental_frame_skip else 1):
 
             if action == ACT_LEFT:
-                self.cart.rotate(-ROTATION * (1 if incremental_frame_skip else frame_skip))
+                self.cart.rotate(-ROTATION * (1 if self.incremental_frame_skip else self.frame_skip))
                 change = True
             elif action == ACT_RIGHT:
-                self.cart.rotate(ROTATION * (1 if incremental_frame_skip else frame_skip))
+                self.cart.rotate(ROTATION * (1 if self.incremental_frame_skip else self.frame_skip))
                 change = True
             elif action == ACT_ACCEL:
-                self.cart.accelerate(ACCELERATION * (1 if incremental_frame_skip else frame_skip))
+                self.cart.accelerate(ACCELERATION * (1 if self.incremental_frame_skip else self.frame_skip))
             elif action == ACT_BRAKE:
-                self.cart.accelerate(-DECELERATION * (1 if incremental_frame_skip else frame_skip))
+                self.cart.accelerate(-DECELERATION * (1 if self.incremental_frame_skip else self.frame_skip))
             elif action == ACT_MINE:
-                for _ in range(1 if incremental_frame_skip else frame_skip):
+                for _ in range(1 if self.incremental_frame_skip else self.frame_skip):
                     change = self.mine() or change
 
             if self.end:
                 break
 
-            for _ in range(1 if incremental_frame_skip else frame_skip):
+            for _ in range(1 if self.incremental_frame_skip else self.frame_skip):
                 change = self.cart.step() or change
 
             distanceFromBase = mag(self.cart.pos - HOME_POS)

diff --git a/mo_gymnasium/envs/resource_gathering/resource_gathering.py b/mo_gymnasium/envs/resource_gathering/resource_gathering.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional
 
 import gymnasium as gym
 import numpy as np
@@ -78,6 +78,73 @@ def __init__(self, render_mode: Optional[str] = None):
         # reward space:
         self.reward_space = Box(low=-1, high=1, shape=(3,), dtype=np.float32)
 
+    def pareto_front(self, gamma: float) -> List[np.ndarray]:
+        """This function returns the pareto front of the resource gathering environment.
+
+        Args:
+            gamma (float): The discount factor.
+
+        Returns:
+            The pareto front of the resource gathering environment.
+        """
+
+        def get_non_dominated(candidates: List[np.ndarray]) -> List[np.ndarray]:
+            """This function returns the non-dominated subset of elements.
+
+            Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
+            The code provided in all the stackoverflow answers is wrong. Important changes have been made in this function.
+
+            Args:
+                candidates: The input set of candidate vectors.
+
+            Returns:
+                The non-dominated subset of this input set.
+            """
+            candidates = np.array(candidates)  # Turn the input set into a numpy array.
+            candidates = candidates[candidates.sum(1).argsort()[::-1]]  # Sort candidates by decreasing sum of coordinates.
+            for i in range(candidates.shape[0]):  # Process each point in turn.
+                n = candidates.shape[0]  # Check current size of the candidates.
+                if i >= n:  # If we've eliminated everything up until this size we stop.
+                    break
+                non_dominated = np.ones(candidates.shape[0], dtype=bool)  # Initialize a boolean mask for undominated points.
+                # find all points not dominated by i
+                # since points are sorted by coordinate sum
+                # i cannot dominate any points in 1,...,i-1
+                non_dominated[i + 1 :] = np.any(candidates[i + 1 :] > candidates[i], axis=1)
+                candidates = candidates[non_dominated]  # Grab only the non-dominated vectors using the generated bitmask.
+
+            non_dominated = set()
+            for candidate in candidates:
+                non_dominated.add(tuple(candidate))  # Add the non dominated vectors to a set again.
+
+            return [np.array(point) for point in non_dominated]
+
+        # Go directly to the diamond (R2) in 10 steps
+        ret1 = np.array([0.0, 0.0, 1.0]) * gamma**10
+
+        # Go to both resources, through both Es
+        ret2 = 0.9 * 0.9 * np.array([0.0, 1.0, 1.0]) * gamma**12  # Didn't die
+        ret2 += 0.1 * np.array([-1.0, 0.0, 0.0]) * gamma**7  # Died to E2
+        ret2 += 0.9 * 0.1 * np.array([-1.0, 0.0, 0.0]) * gamma**9  # Died to E1
+
+        # Go to gold (R1), through E1 both ways
+        ret3 = 0.9 * 0.9 * np.array([0.0, 1.0, 0.0]) * gamma**8  # Didn't die
+        ret3 += 0.1 * np.array([-1.0, 0.0, 0.0]) * gamma**3  # Died to E1
+        ret3 += 0.9 * 0.1 * np.array([-1.0, 0.0, 0.0]) * gamma**5  # Died to E1 in the way back
+
+        # Go to both resources, dodging E1 but through E2
+        ret4 = 0.9 * np.array([0.0, 1.0, 1.0]) * gamma**14  # Didn't die
+        ret4 += 0.1 * np.array([-1.0, 0.0, 0.0]) * gamma**7  # Died to E2
+
+        # Go to gold (R1), doging all E's in 12 steps
+        ret5 = np.array([0.0, 1.0, 0.0]) * gamma**12  # Didn't die
+
+        # Go to gold (R1), going through E1 only once
+        ret6 = 0.9 * np.array([0.0, 1.0, 0.0]) * gamma**10  # Didn't die
+        ret6 += 0.1 * np.array([-1.0, 0.0, 0.0]) * gamma**7  # Died to E1
+
+        return get_non_dominated([ret1, ret2, ret3, ret4, ret5, ret6])
+
     def get_map_value(self, pos):
         return self.map[pos[0]][pos[1]]