From de909da30a1fe40288275da331fa7ad8e219a2bc Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Mon, 29 Jan 2024 15:58:21 +0000 Subject: [PATCH] Update the classic control arguments doc sections (#898) --- gymnasium/envs/box2d/bipedal_walker.py | 12 ++- gymnasium/envs/box2d/car_racing.py | 48 +++++----- gymnasium/envs/box2d/lunar_lander.py | 88 ++++++++----------- gymnasium/envs/classic_control/acrobot.py | 35 ++++---- gymnasium/envs/classic_control/cartpole.py | 36 ++++---- .../continuous_mountain_car.py | 17 ++-- .../envs/classic_control/mountain_car.py | 18 ++-- gymnasium/envs/classic_control/pendulum.py | 26 +++--- gymnasium/envs/mujoco/ant_v5.py | 2 +- gymnasium/envs/mujoco/half_cheetah_v5.py | 2 +- gymnasium/envs/mujoco/hopper_v5.py | 2 +- gymnasium/envs/mujoco/humanoid_v5.py | 2 +- gymnasium/envs/mujoco/humanoidstandup_v5.py | 2 +- .../mujoco/inverted_double_pendulum_v5.py | 2 +- gymnasium/envs/mujoco/inverted_pendulum_v5.py | 2 +- gymnasium/envs/mujoco/pusher_v5.py | 2 +- gymnasium/envs/mujoco/reacher_v5.py | 2 +- gymnasium/envs/mujoco/swimmer_v5.py | 2 +- gymnasium/envs/mujoco/walker2d_v5.py | 2 +- 19 files changed, 159 insertions(+), 143 deletions(-) diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 14ad83164..95974990b 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -142,11 +142,15 @@ class BipedalWalker(gym.Env, EzPickle): if the walker exceeds the right end of the terrain length. ## Arguments - To use the _hardcore_ environment, you need to specify the - `hardcore=True` argument like below: + + To use the _hardcore_ environment, you need to specify the `hardcore=True`: + ```python - import gymnasium as gym - env = gym.make("BipedalWalker-v3", hardcore=True) + >>> import gymnasium as gym + >>> env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array") + >>> env + >>>> + ``` ## Version History diff --git a/gymnasium/envs/box2d/car_racing.py b/gymnasium/envs/box2d/car_racing.py index 5044bb400..6563b872f 100644 --- a/gymnasium/envs/box2d/car_racing.py +++ b/gymnasium/envs/box2d/car_racing.py @@ -115,7 +115,7 @@ class CarRacing(gym.Env, EzPickle): state RGB buffer. From left to right: true speed, four ABS sensors, steering wheel position, and gyroscope. To play yourself (it's rather fast for humans), type: - ``` + ```shell python gymnasium/envs/box2d/car_racing.py ``` Remember: it's a powerful rear-wheel drive car - don't press the accelerator @@ -139,46 +139,54 @@ class CarRacing(gym.Env, EzPickle): A top-down 96x96 RGB image of the car and race track. ## Rewards - The reward is -0.1 every frame and +1000/N for every track tile visited, - where N is the total number of tiles visited in the track. For example, - if you have finished in 732 frames, your reward is - 1000 - 0.1*732 = 926.8 points. + The reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles + visited in the track. For example, if you have finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points. ## Starting State The car starts at rest in the center of the road. ## Episode Termination - The episode finishes when all the tiles are visited. The car can also go - outside the playfield - that is, far off the track, in which case it will - receive -100 reward and die. + The episode finishes when all the tiles are visited. The car can also go outside the playfield - + that is, far off the track, in which case it will receive -100 reward and die. ## Arguments - `lap_complete_percent` dictates the percentage of tiles that must be visited by - the agent before a lap is considered complete. - Passing `domain_randomize=True` enables the domain randomized variant of the environment. - In this scenario, the background and track colours are different on every reset. + ```python + >>> import gymnasium as gym + >>> env = gym.make("CarRacing-v2", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=False) + >>> env + >>>> + + ``` + + * `lap_complete_percent=0.95` dictates the percentage of tiles that must be visited by + the agent before a lap is considered complete. - Passing `continuous=False` converts the environment to use discrete action space. - The discrete action space has 5 actions: [do nothing, left, right, gas, brake]. + * `domain_randomize=False` enables the domain randomized variant of the environment. + In this scenario, the background and track colours are different on every reset. + + * `continuous=True` converts the environment to use discrete action space. + The discrete action space has 5 actions: [do nothing, left, right, gas, brake]. ## Reset Arguments + Passing the option `options["randomize"] = True` will change the current colour of the environment on demand. Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment. `domain_randomize` must be `True` on init for this argument to work. - Example usage: + ```python - import gymnasium as gym - env = gym.make("CarRacing-v1", domain_randomize=True) + >>> import gymnasium as gym + >>> env = gym.make("CarRacing-v2", domain_randomize=True) # normal reset, this changes the colour scheme by default - env.reset() + >>> obs, _ = env.reset() # reset with colour scheme change - env.reset(options={"randomize": True}) + >>> randomize_obs, _ = env.reset(options={"randomize": True}) # reset with no colour scheme change - env.reset(options={"randomize": False}) + >>> non_random_obs, _ = env.reset(options={"randomize": False}) + ``` ## Version History diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index 0d6682443..4e3790bac 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -93,7 +93,7 @@ class LunarLander(gym.Env, EzPickle): can learn to fly and then land on its first attempt. To see a heuristic landing, run: - ``` + ```shell python gymnasium/envs/box2d/lunar_lander.py ``` @@ -145,74 +145,60 @@ class LunarLander(gym.Env, EzPickle): > them is destroyed. ## Arguments - To use the _continuous_ environment, you need to specify the - `continuous=True` argument like below: + + Lunar Lander has a large number of arguments + ```python - import gymnasium as gym - env = gym.make( - "LunarLander-v2", - continuous: bool = False, - gravity: float = -10.0, - enable_wind: bool = False, - wind_power: float = 15.0, - turbulence_power: float = 1.5, - ) + >>> import gymnasium as gym + >>> env = gym.make("LunarLander-v2", continuous=False, gravity=-10.0, + ... enable_wind=False, wind_power=15.0, turbulence_power=1.5) + >>> env + >>>> + ``` - If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the - action space will be `Box(-1, +1, (2,), dtype=np.float32)`. - The first coordinate of an action determines the throttle of the main engine, while the second - coordinate specifies the throttle of the lateral boosters. - Given an action `np.array([main, lateral])`, the main engine will be turned off completely if - `main < 0` and the throttle scales affinely from 50% to 100% for `0 <= main <= 1` (in particular, the - main engine doesn't work with less than 50% power). - Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left - booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely - from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively). - - `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12. - - If `enable_wind=True` is passed, there will be wind effects applied to the lander. - The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`. - `k` is set to 0.01. - `C` is sampled randomly between -9999 and 9999. - - `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0. - `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0. + + * `continuous` determines if discrete or continuous actions (corresponding to the throttle of the engines) will be used with the + action space being `Discrete(4)` or `Box(-1, +1, (2,), dtype=np.float32)` respectively. + For continuous actions, the first coordinate of an action determines the throttle of the main engine, while the second + coordinate specifies the throttle of the lateral boosters. Given an action `np.array([main, lateral])`, the main + engine will be turned off completely if `main < 0` and the throttle scales affinely from 50% to 100% for + `0 <= main <= 1` (in particular, the main engine doesn't work with less than 50% power). + Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left + booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely + from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively). + + * `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12. Default is -10.0 + + * `enable_wind` determines if there will be wind effects applied to the lander. The wind is generated using + the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))` where `k` is set to 0.01 and `C` is sampled randomly between -9999 and 9999. + + * `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for + `wind_power` is between 0.0 and 20.0. + + * `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. + The recommended value for `turbulence_power` is between 0.0 and 2.0. ## Version History - v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters - - v1: Legs contact with ground added in state vector; contact with ground - give +10 reward points, and -10 if then lose contact; reward - renormalized to 200; harder initial random push. + - v1: Legs contact with ground added in state vector; contact with ground give +10 reward points, + and -10 if then lose contact; reward renormalized to 200; harder initial random push. - v0: Initial version - ## Notes There are several unexpected bugs with the implementation of the environment. - 1. The position of the side thursters on the body of the lander changes, depending on the orientation of the lander. - This in turn results in an orientation depentant torque being applied to the lander. + 1. The position of the side thrusters on the body of the lander changes, depending on the orientation of the lander. + This in turn results in an orientation dependent torque being applied to the lander. 2. The units of the state are not consistent. I.e. * The angular velocity is in units of 0.4 radians per second. In order to convert to radians per second, the value needs to be multiplied by a factor of 2.5. For the default values of VIEWPORT_W, VIEWPORT_H, SCALE, and FPS, the scale factors equal: - 'x': 10 - 'y': 6.666 - 'vx': 5 - 'vy': 7.5 - 'angle': 1 - 'angular velocity': 2.5 + 'x': 10, 'y': 6.666, 'vx': 5, 'vy': 7.5, 'angle': 1, 'angular velocity': 2.5 After the correction has been made, the units of the state are as follows: - 'x': (units) - 'y': (units) - 'vx': (units/second) - 'vy': (units/second) - 'angle': (radians) - 'angular velocity': (radians/second) - + 'x': (units), 'y': (units), 'vx': (units/second), 'vy': (units/second), 'angle': (radians), 'angular velocity': (radians/second) diff --git a/gymnasium/envs/classic_control/acrobot.py b/gymnasium/envs/classic_control/acrobot.py index 75f9cd465..1700b93ab 100644 --- a/gymnasium/envs/classic_control/acrobot.py +++ b/gymnasium/envs/classic_control/acrobot.py @@ -96,15 +96,19 @@ class AcrobotEnv(Env): ## Arguments - No additional arguments are currently supported during construction. + Acrobot only has `render_mode` as a keyword for `gymnasium.make`. + On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. ```python - import gymnasium as gym - env = gym.make('Acrobot-v1') - ``` + >>> import gymnasium as gym + >>> env = gym.make('Acrobot-v1', render_mode="rgb_array") + >>> env + >>>> + >>> env.reset(seed=123, options={"low": -0.2, "high": 0.2}) # default low=-0.1, high=0.1 + (array([ 0.997341 , 0.07287608, 0.9841162 , -0.17752565, -0.11185605, + -0.12625128], dtype=float32), {}) - On reset, the `options` parameter allows the user to change the bounds used to determine - the new random state. + ``` By default, the dynamics of the acrobot follow those described in Sutton and Barto's book [Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html). @@ -118,20 +122,17 @@ class AcrobotEnv(Env): See the following note for details: - > The dynamics equations were missing some terms in the NIPS paper which - are present in the book. R. Sutton confirmed in personal correspondence - that the experimental results shown in the paper and the book were - generated with the equations shown in the book. - However, there is the option to run the domain with the paper equations - by setting `book_or_nips = 'nips'` - + > The dynamics equations were missing some terms in the NIPS paper which are present in the book. + R. Sutton confirmed in personal correspondence that the experimental results shown in the paper and the book were + generated with the equations shown in the book. However, there is the option to run the domain with the paper equations + by setting `book_or_nips = 'nips'` ## Version History - v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of `theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the sine and cosine of each angle instead. - - v0: Initial versions release (1.0.0) (removed from gymnasium for v1) + - v0: Initial versions release ## References - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding. @@ -383,8 +384,8 @@ def close(self): def wrap(x, m, M): - """Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which - truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n + """Wraps `x` so m <= x <= M; but unlike `bound()` which + truncates, `wrap()` wraps x around the coordinate system defined by m,M.\n For example, m = -180, M = 180 (degrees), x = 360 --> returns 0. Args: @@ -439,7 +440,7 @@ def rk4(derivs, y0, t): >>> yout = rk4(derivs, y0, t) Args: - derivs: the derivative of the system and has the signature ``dy = derivs(yi)`` + derivs: the derivative of the system and has the signature `dy = derivs(yi)` y0: initial state vector t: sample times diff --git a/gymnasium/envs/classic_control/cartpole.py b/gymnasium/envs/classic_control/cartpole.py index d941c1de5..f1e82054f 100644 --- a/gymnasium/envs/classic_control/cartpole.py +++ b/gymnasium/envs/classic_control/cartpole.py @@ -74,29 +74,33 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): ## Arguments - Cartpole only has ``render_mode`` as a keyword for ``gymnasium.make``. + Cartpole only has `render_mode` as a keyword for `gymnasium.make`. On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. - Examples: - >>> import gymnasium as gym - >>> env = gym.make("CartPole-v1", render_mode="rgb_array") - >>> env - >>>> - >>> env.reset(seed=123, options={"low": 0, "high": 1}) - (array([0.6823519 , 0.05382102, 0.22035988, 0.18437181], dtype=float32), {}) + ```python + >>> import gymnasium as gym + >>> env = gym.make("CartPole-v1", render_mode="rgb_array") + >>> env + >>>> + >>> env.reset(seed=123, options={"low": -0.1, "high": 0.1}) # default low=-0.05, high=0.05 + (array([ 0.03647037, -0.0892358 , -0.05592803, -0.06312564], dtype=float32), {}) + + ``` ## Vectorized environment To increase steps per seconds, users can use a custom vector environment or with an environment vectorizor. - Examples: - >>> import gymnasium as gym - >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="vector_entry_point") - >>> envs - CartPoleVectorEnv(CartPole-v1, num_envs=3) - >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync") - >>> envs - SyncVectorEnv(CartPole-v1, num_envs=3) + ```python + >>> import gymnasium as gym + >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="vector_entry_point") + >>> envs + CartPoleVectorEnv(CartPole-v1, num_envs=3) + >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync") + >>> envs + SyncVectorEnv(CartPole-v1, num_envs=3) + + ``` """ metadata = { diff --git a/gymnasium/envs/classic_control/continuous_mountain_car.py b/gymnasium/envs/classic_control/continuous_mountain_car.py index 6397f7e97..f27577fed 100644 --- a/gymnasium/envs/classic_control/continuous_mountain_car.py +++ b/gymnasium/envs/classic_control/continuous_mountain_car.py @@ -91,17 +91,22 @@ class Continuous_MountainCarEnv(gym.Env): ## Arguments + Continuous Mountain Car has two parameters for `gymnasium.make` with `render_mode` and `goal_velocity`. + On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. + ```python - import gymnasium as gym - gym.make('MountainCarContinuous-v0') - ``` + >>> import gymnasium as gym + >>> env = gym.make("MountainCarContinuous-v0", render_mode="rgb_array", goal_velocity=0.1) # default goal_velocity=0 + >>> env + >>>> + >>> env.reset(seed=123, options={"low": -0.7, "high": -0.5}) # default low=-0.6, high=-0.4 + (array([-0.5635296, 0. ], dtype=float32), {}) - On reset, the `options` parameter allows the user to change the bounds used to determine - the new random state. + ``` ## Version History - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/classic_control/mountain_car.py b/gymnasium/envs/classic_control/mountain_car.py index dfc06070a..a6157377c 100644 --- a/gymnasium/envs/classic_control/mountain_car.py +++ b/gymnasium/envs/classic_control/mountain_car.py @@ -80,20 +80,24 @@ class MountainCarEnv(gym.Env): 1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill) 2. Truncation: The length of the episode is 200. - ## Arguments + Mountain Car has two parameters for `gymnasium.make` with `render_mode` and `goal_velocity`. + On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. + ```python - import gymnasium as gym - gym.make('MountainCar-v0') - ``` + >>> import gymnasium as gym + >>> env = gym.make("MountainCar-v0", render_mode="rgb_array", goal_velocity=0.1) # default goal_velocity=0 + >>> env + >>>> + >>> env.reset(seed=123, options={"x_init": np.pi/2, "y_init": 0.5}) # default x_init=np.pi, y_init=1.0 + (array([-0.46352962, 0. ], dtype=float32), {}) - On reset, the `options` parameter allows the user to change the bounds used to determine - the new random state. + ``` ## Version History - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/classic_control/pendulum.py b/gymnasium/envs/classic_control/pendulum.py index 0c1516680..64866ecaa 100644 --- a/gymnasium/envs/classic_control/pendulum.py +++ b/gymnasium/envs/classic_control/pendulum.py @@ -29,7 +29,7 @@ class PendulumEnv(gym.Env): ![Pendulum Coordinate System](/_static/diagrams/pendulum.png) - - `x-y`: cartesian coordinates of the pendulum's end in meters. + - `x-y`: cartesian coordinates of the pendulum's end in meters. - `theta` : angle in radians. - `tau`: torque in `N m`. Defined as positive _counter-clockwise_. @@ -41,7 +41,6 @@ class PendulumEnv(gym.Env): |-----|--------|------|-----| | 0 | Torque | -2.0 | 2.0 | - ## Observation Space The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free @@ -74,22 +73,27 @@ class PendulumEnv(gym.Env): ## Arguments - - `g`: acceleration of gravity measured in *(m s-2)* used to calculate the pendulum dynamics. - The default value is g = 10.0 . + - `g`: . + + Pendulum has two parameters for `gymnasium.make` with `render_mode` and `g` representing + the acceleration of gravity measured in *(m s-2)* used to calculate the pendulum dynamics. + The default value is `g = 10.0`. + On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. ```python - import gymnasium as gym - gym.make('Pendulum-v1', g=9.81) - ``` + >>> import gymnasium as gym + >>> env = gym.make("Pendulum-v1", render_mode="rgb_array", g=9.81) # default g=10.0 + >>> env + >>>> + >>> env.reset(seed=123, options={"low": -0.7, "high": 0.5}) # default low=-0.6, high=-0.5 + (array([ 0.4123625 , 0.91101986, -0.89235795], dtype=float32), {}) - On reset, the `options` parameter allows the user to change the bounds used to determine - the new random state. + ``` ## Version History * v1: Simplify the math equations, no difference in behavior. - * v0: Initial versions release (1.0.0) - + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/mujoco/ant_v5.py b/gymnasium/envs/mujoco/ant_v5.py index 768cd5f9f..4dccc6756 100644 --- a/gymnasium/envs/mujoco/ant_v5.py +++ b/gymnasium/envs/mujoco/ant_v5.py @@ -215,7 +215,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/mujoco/half_cheetah_v5.py b/gymnasium/envs/mujoco/half_cheetah_v5.py index 6f9aba173..30000851e 100644 --- a/gymnasium/envs/mujoco/half_cheetah_v5.py +++ b/gymnasium/envs/mujoco/half_cheetah_v5.py @@ -140,7 +140,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/hopper_v5.py b/gymnasium/envs/mujoco/hopper_v5.py index fe8fede85..860d5288d 100644 --- a/gymnasium/envs/mujoco/hopper_v5.py +++ b/gymnasium/envs/mujoco/hopper_v5.py @@ -151,7 +151,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/humanoid_v5.py b/gymnasium/envs/mujoco/humanoid_v5.py index bd7d9f3b3..c40656c35 100644 --- a/gymnasium/envs/mujoco/humanoid_v5.py +++ b/gymnasium/envs/mujoco/humanoid_v5.py @@ -292,7 +292,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco-py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/mujoco/humanoidstandup_v5.py b/gymnasium/envs/mujoco/humanoidstandup_v5.py index 9632a24d7..3dd1e77fe 100644 --- a/gymnasium/envs/mujoco/humanoidstandup_v5.py +++ b/gymnasium/envs/mujoco/humanoidstandup_v5.py @@ -274,7 +274,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py index 1981f692e..e1d4cd059 100644 --- a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py +++ b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py @@ -133,7 +133,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum). - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/inverted_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_pendulum_v5.py index 02916001f..9e1e3bd53 100644 --- a/gymnasium/envs/mujoco/inverted_pendulum_v5.py +++ b/gymnasium/envs/mujoco/inverted_pendulum_v5.py @@ -108,7 +108,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.5. * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum). - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py index 2fb8f4d8b..d3824480d 100644 --- a/gymnasium/envs/mujoco/pusher_v5.py +++ b/gymnasium/envs/mujoco/pusher_v5.py @@ -154,7 +154,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks (not including pusher, which has a max_time_steps of 100). Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py index a586ff2e1..5902cd16f 100644 --- a/gymnasium/envs/mujoco/reacher_v5.py +++ b/gymnasium/envs/mujoco/reacher_v5.py @@ -133,7 +133,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): * v3: This environment does not have a v3 release. * v2: All continuous control environments now use mujoco-py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments. - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = { diff --git a/gymnasium/envs/mujoco/swimmer_v5.py b/gymnasium/envs/mujoco/swimmer_v5.py index 4515c5012..2d87d9bbd 100644 --- a/gymnasium/envs/mujoco/swimmer_v5.py +++ b/gymnasium/envs/mujoco/swimmer_v5.py @@ -140,7 +140,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). * v2: All continuous control environments now use mujoco-py >= 1.50. * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0). + * v0: Initial versions release. """ metadata = { diff --git a/gymnasium/envs/mujoco/walker2d_v5.py b/gymnasium/envs/mujoco/walker2d_v5.py index f4f57ef1e..45c196ba3 100644 --- a/gymnasium/envs/mujoco/walker2d_v5.py +++ b/gymnasium/envs/mujoco/walker2d_v5.py @@ -160,7 +160,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco-py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. - * v0: Initial versions release (1.0.0) + * v0: Initial versions release """ metadata = {