Merge branch 'main' into update-vector-env-docs

# Conflicts: # .github/workflows/build-publish.yml
Farama-Foundation · May 21, 2024 · b0cfeb4 · b0cfeb4
2 parents 79b6b74 + 6d5a0f3
commit b0cfeb4
Show file tree

Hide file tree

Showing 61 changed files with 887 additions and 368 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,14 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+  # Enable version updates for GitHub Actions
+  - package-ecosystem: "github-actions"
+    # Look for GitHub Actions workflows in the `root` directory
+    directory: "/"
+    # Check the for updates once a week
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml
@@ -15,9 +15,9 @@ jobs:
       SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
             python-version: '3.9'
 

diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml
@@ -16,38 +16,24 @@ on:
 
 jobs:
   build-wheels:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        include:
-        - os: ubuntu-latest
-          python: 38
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 39
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 310
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 311
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 312
-          platform: manylinux_x86_64
+    runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
+
     - name: Set up Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
-        python-version: '3.x'
+        python-version: '3.8'
+
     - name: Install dependencies
       run: python -m pip install --upgrade pip setuptools build
+
     - name: Build sdist and wheels
       run: python -m build
+
     - name: Store wheels
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         path: dist
 
@@ -58,10 +44,11 @@ jobs:
     if: github.event_name == 'release' && github.event.action == 'published'
     steps:
     - name: Download dists
-      uses: actions/download-artifact@v3
+      uses: actions/download-artifact@v4
       with:
         name: artifact
         path: dist
+
     - name: Publish
       uses: pypa/gh-action-pypi-publish@release/v1
       with:

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -11,7 +11,7 @@ jobs:
       matrix:
         python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - run: |
            docker build -f bin/all-py.Dockerfile \
              --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
@@ -25,7 +25,7 @@ jobs:
     runs-on:
       ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - run: |
            docker build -f bin/necessary-py.Dockerfile \
              --build-arg PYTHON_VERSION='3.10' \

diff --git a/.github/workflows/docs-manual-versioning.yml b/.github/workflows/docs-manual-versioning.yml
@@ -23,15 +23,15 @@ jobs:
     env:
       SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         if: inputs.commit == ''
 
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         if: inputs.commit != ''
         with:
           ref: ${{ inputs.commit }}
 
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
             python-version: '3.9'
 

diff --git a/.github/workflows/docs-versioning.yml b/.github/workflows/docs-versioning.yml
@@ -15,9 +15,9 @@ jobs:
     env:
       SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
             python-version: '3.9'
 

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -13,8 +13,8 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
       - run: python -m pip install pre-commit
       - run: python -m pre_commit --version
       - run: python -m pre_commit install

diff --git a/README.md b/README.md
@@ -61,7 +61,7 @@ Please note that this is an incomplete list, and just includes libraries that th
 
 ## Environment Versioning
 
-Gymnasium keeps strict versioning for reproducibility reasons. All environments end in a suffix like "-v0".  When changes are made to environments that might impact learning results, the number is increased by one to prevent potential confusion. These inherent from Gym.
+Gymnasium keeps strict versioning for reproducibility reasons. All environments end in a suffix like "-v0".  When changes are made to environments that might impact learning results, the number is increased by one to prevent potential confusion. These inherit from Gym.
 
 ## Development Roadmap
 

diff --git a/docs/api/vector.md b/docs/api/vector.md
@@ -53,6 +53,10 @@ vector/utils
 
     The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make_vec`
 
+.. autoattribute:: gymnasium.vector.VectorEnv.metadata
+
+    The metadata of the environment containing rendering modes, rendering fps, etc
+
 .. autoattribute:: gymnasium.vector.VectorEnv.render_mode
 
     The render mode of the environment which should follow similar specifications to `Env.render_mode`.

diff --git a/docs/api/vector/async_vector_env.md b/docs/api/vector/async_vector_env.md
@@ -12,9 +12,9 @@
     .. automethod:: gymnasium.vector.AsyncVectorEnv.set_attr
 ```
 
-### Additional Methods
+## Additional Methods
 
 ```{eval-rst}
-.. autoproperty:: gymnasium.vector.VectorEnv.np_random
-.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed
+.. autoproperty:: gymnasium.vector.AsyncVectorEnv.np_random
+.. autoproperty:: gymnasium.vector.AsyncVectorEnv.np_random_seed
 ```
diff --git a/docs/api/vector/sync_vector_env.md b/docs/api/vector/sync_vector_env.md
@@ -12,9 +12,9 @@
     .. automethod:: gymnasium.vector.SyncVectorEnv.set_attr
 ```
 
-### Additional Methods
+## Additional Methods
 
 ```{eval-rst}
-.. autoproperty:: gymnasium.vector.VectorEnv.np_random
-.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed
+.. autoproperty:: gymnasium.vector.SyncVectorEnv.np_random
+.. autoproperty:: gymnasium.vector.SyncVectorEnv.np_random_seed
 ```
diff --git a/docs/environments/mujoco.md b/docs/environments/mujoco.md
@@ -44,19 +44,19 @@ There are eleven MuJoCo environments (in roughly increasing complexity):
 | Robot                  | Short Description                                                    |
 | ---------------------- | -------------------------------------------------------------------- |
 | **CartPoles**          |                                                                      |
-| InvertedPendulum       | MuJuCo version of the CartPole Environment (with Continuous actions) |
+| InvertedPendulum       | MuJoCo version of the CartPole Environment (with Continuous actions) |
 | InvertedDoublePendulum | 2 Pole variation of the CartPole Environment                         |
 | **Arms**               |                                                                      |
 | Reacher                | 2d arm with the goal of reaching an object                           |
 | Pusher                 | 3d arm with the goal of pushing an object to a target location       |
 | **2D Runners**         |                                                                      |
 | HalfCheetah            | 2d quadruped with the goal of running                                |
-| Hopper                 | 2d monoped with the goal of goal of hopping                          |
-| Walker2d               | 2d bidped with the goal of walking                                   |
+| Hopper                 | 2d monoped with the goal of hopping                          |
+| Walker2d               | 2d biped with the goal of walking                                   |
 | **Swimmers**           |                                                                      |
 | Swimmer                | 3d robot with the goal of swimming                                   |
 | **Quarduped**          |                                                                      |
-| Ant                    | 3d quadurped with the goal of running                                |
+| Ant                    | 3d quadruped with the goal of running                                |
 | **Humanoid Bipeds**    |                                                                      |
 | Humanoid               | 3d humanoid with the goal of running                                 |
 | HumanoidStandup        | 3d humanoid with the goal of standing up                             |
@@ -89,15 +89,15 @@ For more information, see the section "Version History" for each environment.
 ### Comparing training performance across versions
 The training performance of `v2` and `v3` is identical assuming the same/default arguments were used.
 
-The training performance of `v2`/`v3` and `v4` is not directly comparable because of the change to the newer simulator, but the results for not Ant and not Humanoids are comperable (for more information see [GitHub Comment #1](https://github.com/openai/gym/pull/2595#issuecomment-1099152505) and [GitHub Comment #2](https://github.com/openai/gym/pull/2762#issuecomment-1135362092)).
+The training performance of `v2`/`v3` and `v4` are not directly comparable because of the change to the newer simulator, but the results for not Ant and not Humanoids are comparable (for more information see [GitHub Comment #1](https://github.com/openai/gym/pull/2595#issuecomment-1099152505) and [GitHub Comment #2](https://github.com/openai/gym/pull/2762#issuecomment-1135362092)).
 
-The Training performance of `v4` and `v5` is different because of the many changes in the environments, but the Half Cheetah and Swimmer exchibits identical behaviour, Pusher and Swimmer are close (for more information see [GitHub Issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
+The Training performance of `v4` and `v5` is different because of the many changes in the environments, but the Half Cheetah and Swimmer exhibits identical behaviour, Pusher and Swimmer are close (for more information see [GitHub Issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
 
 ### Exact reproducibility
-Note: The exact behavior of the MuJoCo simulator changes slightly between `mujoco`  versions due to floating point operation ordering (more information of their [Documentation]( https://mujoco.readthedocs.io/en/stable/computation/index.html#reproducibility)), if exact reproducibility is need besides using the `seed` for expirements the same simulator version should be used.
+Note: The exact behavior of the MuJoCo simulator changes slightly between `mujoco` versions due to floating point operation ordering (more information of their [Documentation]( https://mujoco.readthedocs.io/en/stable/computation/index.html#reproducibility)), if exact reproducibility is need besides using the `seed` for experiments the same simulator version should be used.
 
 ## Rendering Arguments
-The all MuJoCo Environments besides the general Gymnasium arguments, and environment specific arguments they also take the following arguments for configuring the renderer:
+All of the MuJoCo Environments besides the general Gymnasium arguments, and environment specific arguments they also take the following arguments for configuring the renderer:
 
 ```python
 env = gymnasium.make("Ant-v5", render_mode="rgb_array", width=1280, height=720)

diff --git a/docs/environments/third_party_environments.md b/docs/environments/third_party_environments.md
@@ -2,16 +2,25 @@
 :tocdepth: 2
 ```
 
-# Third-Party Environments
+# External Environments
 
-The Farama Foundation maintains a number of other [projects](https://farama.org/projects), most of which use Gymnasium. Topics include:
-multi-agent RL ([PettingZoo](https://pettingzoo.farama.org/)),
-offline-RL ([Minari](https://minari.farama.org/)),
+## First-Party Environments
+The Farama Foundation maintains a number of other [projects](https://farama.org/projects), which use the Gymnasium API, environments include:
 gridworlds ([Minigrid](https://minigrid.farama.org/)),
 robotics ([Gymnasium-Robotics](https://robotics.farama.org/)),
+3D navigation ([Miniworld](https://miniworld.farama.org/)),
+web interaction ([MiniWoB++](https://miniwob.farama.org/))
+arcade games ([Arcade Learning Environment](https://github.com/Farama-Foundation/Arcade-Learning-Environment))
+Doom ([ViZDoom](https://vizdoom.farama.org/))
+Meta-objective robotics ([Metaworld](https://metaworld.farama.org/))
+autonomous driving ([HighwayEnv](https://highway-env.farama.org/))
+Retro Games ([stable-retro](https://github.com/Farama-Foundation/stable-retro)), and many more.
+
+The Farama Foundation also maintains alternate APIs for RL, including:
+multi-agent RL ([PettingZoo](https://pettingzoo.farama.org/)),
+offline-RL ([Minari](https://minari.farama.org/)),
 multi-objective RL ([MO-Gymnasium](https://mo-gymnasium.farama.org/))
-many-agent RL ([MAgent2](https://magent2.farama.org/)),
-3D navigation ([Miniworld](https://miniworld.farama.org/)), and many more.
+goal-RL ([Gymnasium-Robotics](https://robotics.farama.org/)),
 
 ## Third-party environments with Gymnasium
 
@@ -82,13 +91,6 @@ An environment for guiding automated theorem provers based on saturation algorit
 
 Gym Trading Env simulates stock (or crypto) market from historical data. It was designed to be fast and easily customizable.
 
-### [highway-env: Autonomous driving and tactical decision-making tasks](https://github.com/eleurent/highway-env)
-
-![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.27.1-blue)
-![GitHub stars](https://img.shields.io/github/stars/eleurent/highway-env)
-
-An environment for behavioral planning in autonomous driving, with an emphasis on high-level perception and decision rather than low-level sensing and control.
-
 ### [matrix-mdp: Easily create discrete MDPs](https://github.com/Paul-543NA/matrix-mdp-gym)
 
 ![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.26.2-blue)
@@ -145,13 +147,6 @@ spark-sched-sim simulates Spark clusters for RL-based job scheduling algorithms.
 ![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.29.1-blue)
 ![GitHub stars](https://img.shields.io/github/stars/ArchieGertsman/spark-sched-sim)
 
-### [stable-retro: Classic retro games, a maintained version of OpenAI Retro](https://github.com/Farama-Foundation/stable-retro)
-
-![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.27.1-blue)
-![GitHub stars](https://img.shields.io/github/stars/Farama-Foundation/stable-retro)
-
-Supported fork of [gym-retro](https://openai.com/research/gym-retro): turn classic video games into Gymnasium environments.
-
 ### [sumo-rl: Reinforcement Learning using SUMO traffic simulator](https://github.com/LucasAlegre/sumo-rl)
 
 ![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.26.3-blue)
@@ -166,6 +161,19 @@ Gymnasium wrapper for various environments in the SUMO traffic simulator. Suppor
 
 tmrl is a distributed framework for training Deep Reinforcement Learning AIs in real-time applications. It is demonstrated on the TrackMania 2020 video game.
 
+### [EV2Gym: A Realistic EV-V2G-Gym Simulator for EV Smart Charging](https://github.com/StavrosOrf/EV2Gym)
+
+![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.27.1-blue)
+![GitHub stars](https://img.shields.io/github/stars/StavrosOrf/EV2Gym)
+
+EV2Gym is a fully customizable and easily configurable environment for Electric Vehicle (EV) smart charging simulations on a small and large scale. Also, includes non-RL baseline implementations such as mathematical programming, model predictive control, and heuristics.
+
+### [Buffalo-Gym: Multi-Armed Bandit Gymnasium](https://github.com/foreverska/buffalo-gym)
+
+![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.29.1-blue)
+![GitHub stars](https://img.shields.io/github/stars/foreverska/buffalo-gym)
+
+Buffalo-Gym is a Multi-Armed Bandit (MAB) gymnasium built primarily to assist in debugging RL implementations. MABs are often easy to reason about what the agent is learning and whether it is correct. Buffalo-gym encompasses Bandits, Contextual bandits, and contextual bandits with aliasing.
 ## Third-Party Environments using Gym
 
 There are a large number of third-party environments using various versions of [Gym](https://github.com/openai/gym).

diff --git a/docs/tutorials/gymnasium_basics/environment_creation.py b/docs/tutorials/gymnasium_basics/environment_creation.py
@@ -471,7 +471,7 @@ def close(self):
 # contains the registration code. For the GridWorld env, the registration
 # code is run by importing ``gymnasium_env`` so if it were not possible to
 # import gymnasium_env explicitly, you could register while making by
-# ``env = gymnasium.make('gymnasium_env:gymnasium_env/GridWorld-v0)``. This
+# ``env = gymnasium.make('gymnasium_env:gymnasium_env/GridWorld-v0')``. This
 # is especially useful when you’re allowed to pass only the environment ID
 # into a third-party codebase (eg. learning library). This lets you
 # register your environment without needing to edit the library’s source

diff --git a/docs/tutorials/training_agents/FrozenLake_tuto.py b/docs/tutorials/training_agents/FrozenLake_tuto.py
@@ -161,12 +161,10 @@ def choose_action(self, action_space, state, qtable):
         # Exploitation (taking the biggest Q-value for this state)
         else:
             # Break ties randomly
-            # If all actions are the same for this state we choose a random one
-            # (otherwise `np.argmax()` would always take the first one)
-            if np.all(qtable[state, :]) == qtable[state, 0]:
-                action = action_space.sample()
-            else:
-                action = np.argmax(qtable[state, :])
+            # Find the indices where the Q-value equals the maximum value
+            # Choose a random action from the indices where the Q-value is maximum
+            max_ids = np.where(qtable[state, :] == max(qtable[state, :]))[0]
+            action = rng.choice(max_ids)
         return action
 
 

diff --git a/gymnasium/core.py b/gymnasium/core.py
@@ -332,7 +332,7 @@ def close(self):
 
     @property
     def np_random_seed(self) -> int | None:
-        """Returns the base enviroment's :attr:`np_random_seed`."""
+        """Returns the base environment's :attr:`np_random_seed`."""
         return self.env.np_random_seed
 
     @property

diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py
@@ -23,7 +23,7 @@
     )
 except ImportError as e:
     raise DependencyNotInstalled(
-        'Box2D is not installed, run `pip install "gymnasium[box2d]"`'
+        'Box2D is not installed, you can install it by run `pip install swig` followed by `pip install "gymnasium[box2d]"`'
     ) from e
 
 

diff --git a/gymnasium/envs/box2d/car_dynamics.py b/gymnasium/envs/box2d/car_dynamics.py
@@ -19,7 +19,7 @@
     from Box2D.b2 import fixtureDef, polygonShape, revoluteJointDef
 except ImportError as e:
     raise DependencyNotInstalled(
-        'Box2D is not installed, run `pip install "gymnasium[box2d]"`'
+        'Box2D is not installed, you can install it by run `pip install swig` followed by `pip install "gymnasium[box2d]"`'
     ) from e
 
 

diff --git a/gymnasium/envs/box2d/car_racing.py b/gymnasium/envs/box2d/car_racing.py
@@ -17,7 +17,7 @@
     from Box2D.b2 import contactListener, fixtureDef, polygonShape
 except ImportError as e:
     raise DependencyNotInstalled(
-        'Box2D is not installed, run `pip install "gymnasium[box2d]"`'
+        'Box2D is not installed, you can install it by run `pip install swig` followed by `pip install "gymnasium[box2d]"`'
     ) from e
 
 try:

diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py
@@ -24,7 +24,7 @@
     )
 except ImportError as e:
     raise DependencyNotInstalled(
-        'Box2D is not installed, run `pip install "gymnasium[box2d]"`'
+        'Box2D is not installed, you can install it by run `pip install swig` followed by `pip install "gymnasium[box2d]"`'
     ) from e