Skip to content

Commit

Permalink
Merge branch 'main' into update-vector-env-docs
Browse files Browse the repository at this point in the history
# Conflicts:
#	.github/workflows/build-publish.yml
  • Loading branch information
pseudo-rnd-thoughts committed May 21, 2024
2 parents 79b6b74 + 6d5a0f3 commit b0cfeb4
Show file tree
Hide file tree
Showing 61 changed files with 887 additions and 368 deletions.
14 changes: 14 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates

version: 2
updates:
# Enable version updates for GitHub Actions
- package-ecosystem: "github-actions"
# Look for GitHub Actions workflows in the `root` directory
directory: "/"
# Check the for updates once a week
schedule:
interval: "weekly"
4 changes: 2 additions & 2 deletions .github/workflows/build-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ jobs:
SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: '3.9'

Expand Down
35 changes: 11 additions & 24 deletions .github/workflows/build-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,38 +16,24 @@ on:

jobs:
build-wheels:
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
- os: ubuntu-latest
python: 38
platform: manylinux_x86_64
- os: ubuntu-latest
python: 39
platform: manylinux_x86_64
- os: ubuntu-latest
python: 310
platform: manylinux_x86_64
- os: ubuntu-latest
python: 311
platform: manylinux_x86_64
- os: ubuntu-latest
python: 312
platform: manylinux_x86_64
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.x'
python-version: '3.8'

- name: Install dependencies
run: python -m pip install --upgrade pip setuptools build

- name: Build sdist and wheels
run: python -m build

- name: Store wheels
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
path: dist

Expand All @@ -58,10 +44,11 @@ jobs:
if: github.event_name == 'release' && github.event.action == 'published'
steps:
- name: Download dists
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: artifact
path: dist

- name: Publish
uses: pypa/gh-action-pypi-publish@release/v1
with:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- run: |
docker build -f bin/all-py.Dockerfile \
--build-arg PYTHON_VERSION=${{ matrix.python-version }} \
Expand All @@ -25,7 +25,7 @@ jobs:
runs-on:
ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- run: |
docker build -f bin/necessary-py.Dockerfile \
--build-arg PYTHON_VERSION='3.10' \
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docs-manual-versioning.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ jobs:
env:
SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
if: inputs.commit == ''

- uses: actions/checkout@v3
- uses: actions/checkout@v4
if: inputs.commit != ''
with:
ref: ${{ inputs.commit }}

- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: '3.9'

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/docs-versioning.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ jobs:
env:
SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: '3.9'

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- run: python -m pip install pre-commit
- run: python -m pre_commit --version
- run: python -m pre_commit install
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Please note that this is an incomplete list, and just includes libraries that th

## Environment Versioning

Gymnasium keeps strict versioning for reproducibility reasons. All environments end in a suffix like "-v0". When changes are made to environments that might impact learning results, the number is increased by one to prevent potential confusion. These inherent from Gym.
Gymnasium keeps strict versioning for reproducibility reasons. All environments end in a suffix like "-v0". When changes are made to environments that might impact learning results, the number is increased by one to prevent potential confusion. These inherit from Gym.

## Development Roadmap

Expand Down
4 changes: 4 additions & 0 deletions docs/api/vector.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ vector/utils
The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make_vec`
.. autoattribute:: gymnasium.vector.VectorEnv.metadata
The metadata of the environment containing rendering modes, rendering fps, etc
.. autoattribute:: gymnasium.vector.VectorEnv.render_mode
The render mode of the environment which should follow similar specifications to `Env.render_mode`.
Expand Down
6 changes: 3 additions & 3 deletions docs/api/vector/async_vector_env.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
.. automethod:: gymnasium.vector.AsyncVectorEnv.set_attr
```

### Additional Methods
## Additional Methods

```{eval-rst}
.. autoproperty:: gymnasium.vector.VectorEnv.np_random
.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed
.. autoproperty:: gymnasium.vector.AsyncVectorEnv.np_random
.. autoproperty:: gymnasium.vector.AsyncVectorEnv.np_random_seed
```
6 changes: 3 additions & 3 deletions docs/api/vector/sync_vector_env.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
.. automethod:: gymnasium.vector.SyncVectorEnv.set_attr
```

### Additional Methods
## Additional Methods

```{eval-rst}
.. autoproperty:: gymnasium.vector.VectorEnv.np_random
.. autoproperty:: gymnasium.vector.VectorEnv.np_random_seed
.. autoproperty:: gymnasium.vector.SyncVectorEnv.np_random
.. autoproperty:: gymnasium.vector.SyncVectorEnv.np_random_seed
```
16 changes: 8 additions & 8 deletions docs/environments/mujoco.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,19 @@ There are eleven MuJoCo environments (in roughly increasing complexity):
| Robot | Short Description |
| ---------------------- | -------------------------------------------------------------------- |
| **CartPoles** | |
| InvertedPendulum | MuJuCo version of the CartPole Environment (with Continuous actions) |
| InvertedPendulum | MuJoCo version of the CartPole Environment (with Continuous actions) |
| InvertedDoublePendulum | 2 Pole variation of the CartPole Environment |
| **Arms** | |
| Reacher | 2d arm with the goal of reaching an object |
| Pusher | 3d arm with the goal of pushing an object to a target location |
| **2D Runners** | |
| HalfCheetah | 2d quadruped with the goal of running |
| Hopper | 2d monoped with the goal of goal of hopping |
| Walker2d | 2d bidped with the goal of walking |
| Hopper | 2d monoped with the goal of hopping |
| Walker2d | 2d biped with the goal of walking |
| **Swimmers** | |
| Swimmer | 3d robot with the goal of swimming |
| **Quarduped** | |
| Ant | 3d quadurped with the goal of running |
| Ant | 3d quadruped with the goal of running |
| **Humanoid Bipeds** | |
| Humanoid | 3d humanoid with the goal of running |
| HumanoidStandup | 3d humanoid with the goal of standing up |
Expand Down Expand Up @@ -89,15 +89,15 @@ For more information, see the section "Version History" for each environment.
### Comparing training performance across versions
The training performance of `v2` and `v3` is identical assuming the same/default arguments were used.

The training performance of `v2`/`v3` and `v4` is not directly comparable because of the change to the newer simulator, but the results for not Ant and not Humanoids are comperable (for more information see [GitHub Comment #1](https://github.com/openai/gym/pull/2595#issuecomment-1099152505) and [GitHub Comment #2](https://github.com/openai/gym/pull/2762#issuecomment-1135362092)).
The training performance of `v2`/`v3` and `v4` are not directly comparable because of the change to the newer simulator, but the results for not Ant and not Humanoids are comparable (for more information see [GitHub Comment #1](https://github.com/openai/gym/pull/2595#issuecomment-1099152505) and [GitHub Comment #2](https://github.com/openai/gym/pull/2762#issuecomment-1135362092)).

The Training performance of `v4` and `v5` is different because of the many changes in the environments, but the Half Cheetah and Swimmer exchibits identical behaviour, Pusher and Swimmer are close (for more information see [GitHub Issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
The Training performance of `v4` and `v5` is different because of the many changes in the environments, but the Half Cheetah and Swimmer exhibits identical behaviour, Pusher and Swimmer are close (for more information see [GitHub Issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).

### Exact reproducibility
Note: The exact behavior of the MuJoCo simulator changes slightly between `mujoco` versions due to floating point operation ordering (more information of their [Documentation]( https://mujoco.readthedocs.io/en/stable/computation/index.html#reproducibility)), if exact reproducibility is need besides using the `seed` for expirements the same simulator version should be used.
Note: The exact behavior of the MuJoCo simulator changes slightly between `mujoco` versions due to floating point operation ordering (more information of their [Documentation]( https://mujoco.readthedocs.io/en/stable/computation/index.html#reproducibility)), if exact reproducibility is need besides using the `seed` for experiments the same simulator version should be used.

## Rendering Arguments
The all MuJoCo Environments besides the general Gymnasium arguments, and environment specific arguments they also take the following arguments for configuring the renderer:
All of the MuJoCo Environments besides the general Gymnasium arguments, and environment specific arguments they also take the following arguments for configuring the renderer:

```python
env = gymnasium.make("Ant-v5", render_mode="rgb_array", width=1280, height=720)
Expand Down
48 changes: 28 additions & 20 deletions docs/environments/third_party_environments.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,25 @@
:tocdepth: 2
```

# Third-Party Environments
# External Environments

The Farama Foundation maintains a number of other [projects](https://farama.org/projects), most of which use Gymnasium. Topics include:
multi-agent RL ([PettingZoo](https://pettingzoo.farama.org/)),
offline-RL ([Minari](https://minari.farama.org/)),
## First-Party Environments
The Farama Foundation maintains a number of other [projects](https://farama.org/projects), which use the Gymnasium API, environments include:
gridworlds ([Minigrid](https://minigrid.farama.org/)),
robotics ([Gymnasium-Robotics](https://robotics.farama.org/)),
3D navigation ([Miniworld](https://miniworld.farama.org/)),
web interaction ([MiniWoB++](https://miniwob.farama.org/))
arcade games ([Arcade Learning Environment](https://github.com/Farama-Foundation/Arcade-Learning-Environment))
Doom ([ViZDoom](https://vizdoom.farama.org/))
Meta-objective robotics ([Metaworld](https://metaworld.farama.org/))
autonomous driving ([HighwayEnv](https://highway-env.farama.org/))
Retro Games ([stable-retro](https://github.com/Farama-Foundation/stable-retro)), and many more.

The Farama Foundation also maintains alternate APIs for RL, including:
multi-agent RL ([PettingZoo](https://pettingzoo.farama.org/)),
offline-RL ([Minari](https://minari.farama.org/)),
multi-objective RL ([MO-Gymnasium](https://mo-gymnasium.farama.org/))
many-agent RL ([MAgent2](https://magent2.farama.org/)),
3D navigation ([Miniworld](https://miniworld.farama.org/)), and many more.
goal-RL ([Gymnasium-Robotics](https://robotics.farama.org/)),

## Third-party environments with Gymnasium

Expand Down Expand Up @@ -82,13 +91,6 @@ An environment for guiding automated theorem provers based on saturation algorit

Gym Trading Env simulates stock (or crypto) market from historical data. It was designed to be fast and easily customizable.

### [highway-env: Autonomous driving and tactical decision-making tasks](https://github.com/eleurent/highway-env)

![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.27.1-blue)
![GitHub stars](https://img.shields.io/github/stars/eleurent/highway-env)

An environment for behavioral planning in autonomous driving, with an emphasis on high-level perception and decision rather than low-level sensing and control.

### [matrix-mdp: Easily create discrete MDPs](https://github.com/Paul-543NA/matrix-mdp-gym)

![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.26.2-blue)
Expand Down Expand Up @@ -145,13 +147,6 @@ spark-sched-sim simulates Spark clusters for RL-based job scheduling algorithms.
![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.29.1-blue)
![GitHub stars](https://img.shields.io/github/stars/ArchieGertsman/spark-sched-sim)

### [stable-retro: Classic retro games, a maintained version of OpenAI Retro](https://github.com/Farama-Foundation/stable-retro)

![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.27.1-blue)
![GitHub stars](https://img.shields.io/github/stars/Farama-Foundation/stable-retro)

Supported fork of [gym-retro](https://openai.com/research/gym-retro): turn classic video games into Gymnasium environments.

### [sumo-rl: Reinforcement Learning using SUMO traffic simulator](https://github.com/LucasAlegre/sumo-rl)

![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.26.3-blue)
Expand All @@ -166,6 +161,19 @@ Gymnasium wrapper for various environments in the SUMO traffic simulator. Suppor

tmrl is a distributed framework for training Deep Reinforcement Learning AIs in real-time applications. It is demonstrated on the TrackMania 2020 video game.

### [EV2Gym: A Realistic EV-V2G-Gym Simulator for EV Smart Charging](https://github.com/StavrosOrf/EV2Gym)

![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.27.1-blue)
![GitHub stars](https://img.shields.io/github/stars/StavrosOrf/EV2Gym)

EV2Gym is a fully customizable and easily configurable environment for Electric Vehicle (EV) smart charging simulations on a small and large scale. Also, includes non-RL baseline implementations such as mathematical programming, model predictive control, and heuristics.

### [Buffalo-Gym: Multi-Armed Bandit Gymnasium](https://github.com/foreverska/buffalo-gym)

![Gymnasium version dependency](https://img.shields.io/badge/Gymnasium-v0.29.1-blue)
![GitHub stars](https://img.shields.io/github/stars/foreverska/buffalo-gym)

Buffalo-Gym is a Multi-Armed Bandit (MAB) gymnasium built primarily to assist in debugging RL implementations. MABs are often easy to reason about what the agent is learning and whether it is correct. Buffalo-gym encompasses Bandits, Contextual bandits, and contextual bandits with aliasing.
## Third-Party Environments using Gym

There are a large number of third-party environments using various versions of [Gym](https://github.com/openai/gym).
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/gymnasium_basics/environment_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def close(self):
# contains the registration code. For the GridWorld env, the registration
# code is run by importing ``gymnasium_env`` so if it were not possible to
# import gymnasium_env explicitly, you could register while making by
# ``env = gymnasium.make('gymnasium_env:gymnasium_env/GridWorld-v0)``. This
# ``env = gymnasium.make('gymnasium_env:gymnasium_env/GridWorld-v0')``. This
# is especially useful when you’re allowed to pass only the environment ID
# into a third-party codebase (eg. learning library). This lets you
# register your environment without needing to edit the library’s source
Expand Down
10 changes: 4 additions & 6 deletions docs/tutorials/training_agents/FrozenLake_tuto.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,10 @@ def choose_action(self, action_space, state, qtable):
# Exploitation (taking the biggest Q-value for this state)
else:
# Break ties randomly
# If all actions are the same for this state we choose a random one
# (otherwise `np.argmax()` would always take the first one)
if np.all(qtable[state, :]) == qtable[state, 0]:
action = action_space.sample()
else:
action = np.argmax(qtable[state, :])
# Find the indices where the Q-value equals the maximum value
# Choose a random action from the indices where the Q-value is maximum
max_ids = np.where(qtable[state, :] == max(qtable[state, :]))[0]
action = rng.choice(max_ids)
return action


Expand Down
2 changes: 1 addition & 1 deletion gymnasium/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def close(self):

@property
def np_random_seed(self) -> int | None:
"""Returns the base enviroment's :attr:`np_random_seed`."""
"""Returns the base environment's :attr:`np_random_seed`."""
return self.env.np_random_seed

@property
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/envs/box2d/bipedal_walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
)
except ImportError as e:
raise DependencyNotInstalled(
'Box2D is not installed, run `pip install "gymnasium[box2d]"`'
'Box2D is not installed, you can install it by run `pip install swig` followed by `pip install "gymnasium[box2d]"`'
) from e


Expand Down
2 changes: 1 addition & 1 deletion gymnasium/envs/box2d/car_dynamics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from Box2D.b2 import fixtureDef, polygonShape, revoluteJointDef
except ImportError as e:
raise DependencyNotInstalled(
'Box2D is not installed, run `pip install "gymnasium[box2d]"`'
'Box2D is not installed, you can install it by run `pip install swig` followed by `pip install "gymnasium[box2d]"`'
) from e


Expand Down
2 changes: 1 addition & 1 deletion gymnasium/envs/box2d/car_racing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from Box2D.b2 import contactListener, fixtureDef, polygonShape
except ImportError as e:
raise DependencyNotInstalled(
'Box2D is not installed, run `pip install "gymnasium[box2d]"`'
'Box2D is not installed, you can install it by run `pip install swig` followed by `pip install "gymnasium[box2d]"`'
) from e

try:
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/envs/box2d/lunar_lander.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
)
except ImportError as e:
raise DependencyNotInstalled(
'Box2D is not installed, run `pip install "gymnasium[box2d]"`'
'Box2D is not installed, you can install it by run `pip install swig` followed by `pip install "gymnasium[box2d]"`'
) from e


Expand Down
Loading

0 comments on commit b0cfeb4

Please sign in to comment.