From de909da30a1fe40288275da331fa7ad8e219a2bc Mon Sep 17 00:00:00 2001
From: Mark Towers <mark.m.towers@gmail.com>
Date: Mon, 29 Jan 2024 15:58:21 +0000
Subject: [PATCH] Update the classic control arguments doc sections (#898)

---
 gymnasium/envs/box2d/bipedal_walker.py        | 12 ++-
 gymnasium/envs/box2d/car_racing.py            | 48 +++++-----
 gymnasium/envs/box2d/lunar_lander.py          | 88 ++++++++-----------
 gymnasium/envs/classic_control/acrobot.py     | 35 ++++----
 gymnasium/envs/classic_control/cartpole.py    | 36 ++++----
 .../continuous_mountain_car.py                | 17 ++--
 .../envs/classic_control/mountain_car.py      | 18 ++--
 gymnasium/envs/classic_control/pendulum.py    | 26 +++---
 gymnasium/envs/mujoco/ant_v5.py               |  2 +-
 gymnasium/envs/mujoco/half_cheetah_v5.py      |  2 +-
 gymnasium/envs/mujoco/hopper_v5.py            |  2 +-
 gymnasium/envs/mujoco/humanoid_v5.py          |  2 +-
 gymnasium/envs/mujoco/humanoidstandup_v5.py   |  2 +-
 .../mujoco/inverted_double_pendulum_v5.py     |  2 +-
 gymnasium/envs/mujoco/inverted_pendulum_v5.py |  2 +-
 gymnasium/envs/mujoco/pusher_v5.py            |  2 +-
 gymnasium/envs/mujoco/reacher_v5.py           |  2 +-
 gymnasium/envs/mujoco/swimmer_v5.py           |  2 +-
 gymnasium/envs/mujoco/walker2d_v5.py          |  2 +-
 19 files changed, 159 insertions(+), 143 deletions(-)
diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py
index 14ad83164..95974990b 100644
--- a/gymnasium/envs/box2d/bipedal_walker.py
+++ b/gymnasium/envs/box2d/bipedal_walker.py
@@ -142,11 +142,15 @@ class BipedalWalker(gym.Env, EzPickle):
     if the walker exceeds the right end of the terrain length.
 
     ## Arguments
-    To use the _hardcore_ environment, you need to specify the
-    `hardcore=True` argument like below:
+
+    To use the _hardcore_ environment, you need to specify the `hardcore=True`:
+
     ```python
-    import gymnasium as gym
-    env = gym.make("BipedalWalker-v3", hardcore=True)
+    >>> import gymnasium as gym
+    >>> env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<BipedalWalker<BipedalWalker-v3>>>>>
+
     ```
 
     ## Version History
diff --git a/gymnasium/envs/box2d/car_racing.py b/gymnasium/envs/box2d/car_racing.py
index 5044bb400..6563b872f 100644
--- a/gymnasium/envs/box2d/car_racing.py
+++ b/gymnasium/envs/box2d/car_racing.py
@@ -115,7 +115,7 @@ class CarRacing(gym.Env, EzPickle):
     state RGB buffer. From left to right: true speed, four ABS sensors,
     steering wheel position, and gyroscope.
     To play yourself (it's rather fast for humans), type:
-    ```
+    ```shell
     python gymnasium/envs/box2d/car_racing.py
     ```
     Remember: it's a powerful rear-wheel drive car - don't press the accelerator
@@ -139,46 +139,54 @@ class CarRacing(gym.Env, EzPickle):
     A top-down 96x96 RGB image of the car and race track.
 
     ## Rewards
-    The reward is -0.1 every frame and +1000/N for every track tile visited,
-    where N is the total number of tiles visited in the track. For example,
-    if you have finished in 732 frames, your reward is
-    1000 - 0.1*732 = 926.8 points.
+    The reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles
+     visited in the track. For example, if you have finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points.
 
     ## Starting State
     The car starts at rest in the center of the road.
 
     ## Episode Termination
-    The episode finishes when all the tiles are visited. The car can also go
-    outside the playfield - that is, far off the track, in which case it will
-    receive -100 reward and die.
+    The episode finishes when all the tiles are visited. The car can also go outside the playfield -
+     that is, far off the track, in which case it will receive -100 reward and die.
 
     ## Arguments
-    `lap_complete_percent` dictates the percentage of tiles that must be visited by
-    the agent before a lap is considered complete.
 
-    Passing `domain_randomize=True` enables the domain randomized variant of the environment.
-    In this scenario, the background and track colours are different on every reset.
+    ```python
+    >>> import gymnasium as gym
+    >>> env = gym.make("CarRacing-v2", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=False)
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v2>>>>>
+
+    ```
+
+    * `lap_complete_percent=0.95` dictates the percentage of tiles that must be visited by
+     the agent before a lap is considered complete.
 
-    Passing `continuous=False` converts the environment to use discrete action space.
-    The discrete action space has 5 actions: [do nothing, left, right, gas, brake].
+    * `domain_randomize=False` enables the domain randomized variant of the environment.
+     In this scenario, the background and track colours are different on every reset.
+
+    * `continuous=True` converts the environment to use discrete action space.
+     The discrete action space has 5 actions: [do nothing, left, right, gas, brake].
 
     ## Reset Arguments
+
     Passing the option `options["randomize"] = True` will change the current colour of the environment on demand.
     Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment.
     `domain_randomize` must be `True` on init for this argument to work.
-    Example usage:
+
     ```python
-    import gymnasium as gym
-    env = gym.make("CarRacing-v1", domain_randomize=True)
+    >>> import gymnasium as gym
+    >>> env = gym.make("CarRacing-v2", domain_randomize=True)
 
     # normal reset, this changes the colour scheme by default
-    env.reset()
+    >>> obs, _ = env.reset()
 
     # reset with colour scheme change
-    env.reset(options={"randomize": True})
+    >>> randomize_obs, _ = env.reset(options={"randomize": True})
 
     # reset with no colour scheme change
-    env.reset(options={"randomize": False})
+    >>> non_random_obs, _ = env.reset(options={"randomize": False})
+
     ```
 
     ## Version History
diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py
index 0d6682443..4e3790bac 100644
--- a/gymnasium/envs/box2d/lunar_lander.py
+++ b/gymnasium/envs/box2d/lunar_lander.py
@@ -93,7 +93,7 @@ class LunarLander(gym.Env, EzPickle):
     can learn to fly and then land on its first attempt.
 
     To see a heuristic landing, run:
-    ```
+    ```shell
     python gymnasium/envs/box2d/lunar_lander.py
     ```
     <!-- To play yourself, run: -->
@@ -145,74 +145,60 @@ class LunarLander(gym.Env, EzPickle):
     > them is destroyed.
 
     ## Arguments
-    To use the _continuous_ environment, you need to specify the
-    `continuous=True` argument like below:
+
+    Lunar Lander has a large number of arguments
+
     ```python
-    import gymnasium as gym
-    env = gym.make(
-        "LunarLander-v2",
-        continuous: bool = False,
-        gravity: float = -10.0,
-        enable_wind: bool = False,
-        wind_power: float = 15.0,
-        turbulence_power: float = 1.5,
-    )
+    >>> import gymnasium as gym
+    >>> env = gym.make("LunarLander-v2", continuous=False, gravity=-10.0,
+    ...                enable_wind=False, wind_power=15.0, turbulence_power=1.5)
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<LunarLander<LunarLander-v2>>>>>
+
     ```
-    If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the
-    action space will be `Box(-1, +1, (2,), dtype=np.float32)`.
-    The first coordinate of an action determines the throttle of the main engine, while the second
-    coordinate specifies the throttle of the lateral boosters.
-    Given an action `np.array([main, lateral])`, the main engine will be turned off completely if
-    `main < 0` and the throttle scales affinely from 50% to 100% for `0 <= main <= 1` (in particular, the
-    main engine doesn't work  with less than 50% power).
-    Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left
-    booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely
-    from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively).
-
-    `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12.
-
-    If `enable_wind=True` is passed, there will be wind effects applied to the lander.
-    The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`.
-    `k` is set to 0.01.
-    `C` is sampled randomly between -9999 and 9999.
-
-    `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0.
-    `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0.
+
+     * `continuous` determines if discrete or continuous actions (corresponding to the throttle of the engines) will be used with the
+     action space being `Discrete(4)` or `Box(-1, +1, (2,), dtype=np.float32)` respectively.
+     For continuous actions, the first coordinate of an action determines the throttle of the main engine, while the second
+     coordinate specifies the throttle of the lateral boosters. Given an action `np.array([main, lateral])`, the main
+     engine will be turned off completely if `main < 0` and the throttle scales affinely from 50% to 100% for
+     `0 <= main <= 1` (in particular, the main engine doesn't work  with less than 50% power).
+     Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left
+     booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely
+     from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively).
+
+    * `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12. Default is -10.0
+
+    * `enable_wind` determines if there will be wind effects applied to the lander. The wind is generated using
+     the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))` where `k` is set to 0.01 and `C` is sampled randomly between -9999 and 9999.
+
+    * `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for
+     `wind_power` is between 0.0 and 20.0.
+
+    * `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft.
+     The recommended value for `turbulence_power` is between 0.0 and 2.0.
 
     ## Version History
     - v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters
-    - v1: Legs contact with ground added in state vector; contact with ground
-        give +10 reward points, and -10 if then lose contact; reward
-        renormalized to 200; harder initial random push.
+    - v1: Legs contact with ground added in state vector; contact with ground give +10 reward points,
+          and -10 if then lose contact; reward renormalized to 200; harder initial random push.
     - v0: Initial version
 
-
     ## Notes
 
     There are several unexpected bugs with the implementation of the environment.
 
-    1. The position of the side thursters on the body of the lander changes, depending on the orientation of the lander.
-    This in turn results in an orientation depentant torque being applied to the lander.
+    1. The position of the side thrusters on the body of the lander changes, depending on the orientation of the lander.
+    This in turn results in an orientation dependent torque being applied to the lander.
 
     2. The units of the state are not consistent. I.e.
     * The angular velocity is in units of 0.4 radians per second. In order to convert to radians per second, the value needs to be multiplied by a factor of 2.5.
 
     For the default values of VIEWPORT_W, VIEWPORT_H, SCALE, and FPS, the scale factors equal:
-    'x': 10
-    'y': 6.666
-    'vx': 5
-    'vy': 7.5
-    'angle': 1
-    'angular velocity': 2.5
+    'x': 10, 'y': 6.666, 'vx': 5, 'vy': 7.5, 'angle': 1, 'angular velocity': 2.5
 
     After the correction has been made, the units of the state are as follows:
-    'x': (units)
-    'y': (units)
-    'vx': (units/second)
-    'vy': (units/second)
-    'angle': (radians)
-    'angular velocity': (radians/second)
-
+    'x': (units), 'y': (units), 'vx': (units/second), 'vy': (units/second), 'angle': (radians), 'angular velocity': (radians/second)
 
     <!-- ## References -->
 
diff --git a/gymnasium/envs/classic_control/acrobot.py b/gymnasium/envs/classic_control/acrobot.py
index 75f9cd465..1700b93ab 100644
--- a/gymnasium/envs/classic_control/acrobot.py
+++ b/gymnasium/envs/classic_control/acrobot.py
@@ -96,15 +96,19 @@ class AcrobotEnv(Env):
 
     ## Arguments
 
-    No additional arguments are currently supported during construction.
+    Acrobot only has `render_mode` as a keyword for `gymnasium.make`.
+    On reset, the `options` parameter allows the user to change the bounds used to determine the new random state.
 
     ```python
-    import gymnasium as gym
-    env = gym.make('Acrobot-v1')
-    ```
+    >>> import gymnasium as gym
+    >>> env = gym.make('Acrobot-v1', render_mode="rgb_array")
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<AcrobotEnv<Acrobot-v1>>>>>
+    >>> env.reset(seed=123, options={"low": -0.2, "high": 0.2})  # default low=-0.1, high=0.1
+    (array([ 0.997341  ,  0.07287608,  0.9841162 , -0.17752565, -0.11185605,
+           -0.12625128], dtype=float32), {})
 
-    On reset, the `options` parameter allows the user to change the bounds used to determine
-    the new random state.
+    ```
 
     By default, the dynamics of the acrobot follow those described in Sutton and Barto's book
     [Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html).
@@ -118,20 +122,17 @@ class AcrobotEnv(Env):
 
     See the following note for details:
 
-    > The dynamics equations were missing some terms in the NIPS paper which
-            are present in the book. R. Sutton confirmed in personal correspondence
-            that the experimental results shown in the paper and the book were
-            generated with the equations shown in the book.
-            However, there is the option to run the domain with the paper equations
-            by setting `book_or_nips = 'nips'`
-
+    > The dynamics equations were missing some terms in the NIPS paper which are present in the book.
+      R. Sutton confirmed in personal correspondence that the experimental results shown in the paper and the book were
+      generated with the equations shown in the book. However, there is the option to run the domain with the paper equations
+      by setting `book_or_nips = 'nips'`
 
     ## Version History
 
     - v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of
     `theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the
     sine and cosine of each angle instead.
-    - v0: Initial versions release (1.0.0) (removed from gymnasium for v1)
+    - v0: Initial versions release
 
     ## References
     - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding.
@@ -383,8 +384,8 @@ def close(self):
 
 
 def wrap(x, m, M):
-    """Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
-    truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
+    """Wraps `x` so m <= x <= M; but unlike `bound()` which
+    truncates, `wrap()` wraps x around the coordinate system defined by m,M.\n
     For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
 
     Args:
@@ -439,7 +440,7 @@ def rk4(derivs, y0, t):
         >>> yout = rk4(derivs, y0, t)
 
     Args:
-        derivs: the derivative of the system and has the signature ``dy = derivs(yi)``
+        derivs: the derivative of the system and has the signature `dy = derivs(yi)`
         y0: initial state vector
         t: sample times
 
diff --git a/gymnasium/envs/classic_control/cartpole.py b/gymnasium/envs/classic_control/cartpole.py
index d941c1de5..f1e82054f 100644
--- a/gymnasium/envs/classic_control/cartpole.py
+++ b/gymnasium/envs/classic_control/cartpole.py
@@ -74,29 +74,33 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
 
     ## Arguments
 
-    Cartpole only has ``render_mode`` as a keyword for ``gymnasium.make``.
+    Cartpole only has `render_mode` as a keyword for `gymnasium.make`.
     On reset, the `options` parameter allows the user to change the bounds used to determine the new random state.
 
-    Examples:
-        >>> import gymnasium as gym
-        >>> env = gym.make("CartPole-v1", render_mode="rgb_array")
-        >>> env
-        <TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
-        >>> env.reset(seed=123, options={"low": 0, "high": 1})
-        (array([0.6823519 , 0.05382102, 0.22035988, 0.18437181], dtype=float32), {})
+    ```python
+    >>> import gymnasium as gym
+    >>> env = gym.make("CartPole-v1", render_mode="rgb_array")
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
+    >>> env.reset(seed=123, options={"low": -0.1, "high": 0.1})  # default low=-0.05, high=0.05
+    (array([ 0.03647037, -0.0892358 , -0.05592803, -0.06312564], dtype=float32), {})
+
+    ```
 
     ## Vectorized environment
 
     To increase steps per seconds, users can use a custom vector environment or with an environment vectorizor.
 
-    Examples:
-        >>> import gymnasium as gym
-        >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="vector_entry_point")
-        >>> envs
-        CartPoleVectorEnv(CartPole-v1, num_envs=3)
-        >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
-        >>> envs
-        SyncVectorEnv(CartPole-v1, num_envs=3)
+    ```python
+    >>> import gymnasium as gym
+    >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="vector_entry_point")
+    >>> envs
+    CartPoleVectorEnv(CartPole-v1, num_envs=3)
+    >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
+    >>> envs
+    SyncVectorEnv(CartPole-v1, num_envs=3)
+
+    ```
     """
 
     metadata = {
diff --git a/gymnasium/envs/classic_control/continuous_mountain_car.py b/gymnasium/envs/classic_control/continuous_mountain_car.py
index 6397f7e97..f27577fed 100644
--- a/gymnasium/envs/classic_control/continuous_mountain_car.py
+++ b/gymnasium/envs/classic_control/continuous_mountain_car.py
@@ -91,17 +91,22 @@ class Continuous_MountainCarEnv(gym.Env):
 
     ## Arguments
 
+    Continuous Mountain Car has two parameters for `gymnasium.make` with `render_mode` and `goal_velocity`.
+    On reset, the `options` parameter allows the user to change the bounds used to determine the new random state.
+
     ```python
-    import gymnasium as gym
-    gym.make('MountainCarContinuous-v0')
-    ```
+    >>> import gymnasium as gym
+    >>> env = gym.make("MountainCarContinuous-v0", render_mode="rgb_array", goal_velocity=0.1)  # default goal_velocity=0
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<Continuous_MountainCarEnv<MountainCarContinuous-v0>>>>>
+    >>> env.reset(seed=123, options={"low": -0.7, "high": -0.5})  # default low=-0.6, high=-0.4
+    (array([-0.5635296,  0.       ], dtype=float32), {})
 
-    On reset, the `options` parameter allows the user to change the bounds used to determine
-    the new random state.
+    ```
 
     ## Version History
 
-    * v0: Initial versions release (1.0.0)
+    * v0: Initial versions release
     """
 
     metadata = {
diff --git a/gymnasium/envs/classic_control/mountain_car.py b/gymnasium/envs/classic_control/mountain_car.py
index dfc06070a..a6157377c 100644
--- a/gymnasium/envs/classic_control/mountain_car.py
+++ b/gymnasium/envs/classic_control/mountain_car.py
@@ -80,20 +80,24 @@ class MountainCarEnv(gym.Env):
     1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill)
     2. Truncation: The length of the episode is 200.
 
-
     ## Arguments
 
+    Mountain Car has two parameters for `gymnasium.make` with `render_mode` and `goal_velocity`.
+    On reset, the `options` parameter allows the user to change the bounds used to determine the new random state.
+
     ```python
-    import gymnasium as gym
-    gym.make('MountainCar-v0')
-    ```
+    >>> import gymnasium as gym
+    >>> env = gym.make("MountainCar-v0", render_mode="rgb_array", goal_velocity=0.1)  # default goal_velocity=0
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<MountainCarEnv<MountainCar-v0>>>>>
+    >>> env.reset(seed=123, options={"x_init": np.pi/2, "y_init": 0.5})  # default x_init=np.pi, y_init=1.0
+    (array([-0.46352962,  0.        ], dtype=float32), {})
 
-    On reset, the `options` parameter allows the user to change the bounds used to determine
-    the new random state.
+    ```
 
     ## Version History
 
-    * v0: Initial versions release (1.0.0)
+    * v0: Initial versions release
     """
 
     metadata = {
diff --git a/gymnasium/envs/classic_control/pendulum.py b/gymnasium/envs/classic_control/pendulum.py
index 0c1516680..64866ecaa 100644
--- a/gymnasium/envs/classic_control/pendulum.py
+++ b/gymnasium/envs/classic_control/pendulum.py
@@ -29,7 +29,7 @@ class PendulumEnv(gym.Env):
 
     ![Pendulum Coordinate System](/_static/diagrams/pendulum.png)
 
-    -  `x-y`: cartesian coordinates of the pendulum's end in meters.
+    - `x-y`: cartesian coordinates of the pendulum's end in meters.
     - `theta` : angle in radians.
     - `tau`: torque in `N m`. Defined as positive _counter-clockwise_.
 
@@ -41,7 +41,6 @@ class PendulumEnv(gym.Env):
     |-----|--------|------|-----|
     | 0   | Torque | -2.0 | 2.0 |
 
-
     ## Observation Space
 
     The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free
@@ -74,22 +73,27 @@ class PendulumEnv(gym.Env):
 
     ## Arguments
 
-    - `g`: acceleration of gravity measured in *(m s<sup>-2</sup>)* used to calculate the pendulum dynamics.
-      The default value is g = 10.0 .
+    - `g`: .
+
+    Pendulum has two parameters for `gymnasium.make` with `render_mode` and `g` representing
+    the acceleration of gravity measured in *(m s<sup>-2</sup>)* used to calculate the pendulum dynamics.
+    The default value is `g = 10.0`.
+    On reset, the `options` parameter allows the user to change the bounds used to determine the new random state.
 
     ```python
-    import gymnasium as gym
-    gym.make('Pendulum-v1', g=9.81)
-    ```
+    >>> import gymnasium as gym
+    >>> env = gym.make("Pendulum-v1", render_mode="rgb_array", g=9.81)  # default g=10.0
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<PendulumEnv<Pendulum-v1>>>>>
+    >>> env.reset(seed=123, options={"low": -0.7, "high": 0.5})  # default low=-0.6, high=-0.5
+    (array([ 0.4123625 ,  0.91101986, -0.89235795], dtype=float32), {})
 
-    On reset, the `options` parameter allows the user to change the bounds used to determine
-    the new random state.
+    ```
 
     ## Version History
 
     * v1: Simplify the math equations, no difference in behavior.
-    * v0: Initial versions release (1.0.0)
-
+    * v0: Initial versions release
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/ant_v5.py b/gymnasium/envs/mujoco/ant_v5.py
index 768cd5f9f..4dccc6756 100644
--- a/gymnasium/envs/mujoco/ant_v5.py
+++ b/gymnasium/envs/mujoco/ant_v5.py
@@ -215,7 +215,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
     * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen).
     * v2: All continuous control environments now use mujoco-py >= 1.50.
     * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
-    * v0: Initial versions release (1.0.0)
+    * v0: Initial versions release
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/half_cheetah_v5.py b/gymnasium/envs/mujoco/half_cheetah_v5.py
index 6f9aba173..30000851e 100644
--- a/gymnasium/envs/mujoco/half_cheetah_v5.py
+++ b/gymnasium/envs/mujoco/half_cheetah_v5.py
@@ -140,7 +140,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
     * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen).
     * v2: All continuous control environments now use mujoco-py >= 1.50.
     * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
-    * v0: Initial versions release (1.0.0).
+    * v0: Initial versions release.
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/hopper_v5.py b/gymnasium/envs/mujoco/hopper_v5.py
index fe8fede85..860d5288d 100644
--- a/gymnasium/envs/mujoco/hopper_v5.py
+++ b/gymnasium/envs/mujoco/hopper_v5.py
@@ -151,7 +151,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
     * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
     * v2: All continuous control environments now use mujoco-py >= 1.50.
     * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
-    * v0: Initial versions release (1.0.0).
+    * v0: Initial versions release.
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/humanoid_v5.py b/gymnasium/envs/mujoco/humanoid_v5.py
index bd7d9f3b3..c40656c35 100644
--- a/gymnasium/envs/mujoco/humanoid_v5.py
+++ b/gymnasium/envs/mujoco/humanoid_v5.py
@@ -292,7 +292,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
     * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
     * v2: All continuous control environments now use mujoco-py >= 1.50
     * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
-    * v0: Initial versions release (1.0.0)
+    * v0: Initial versions release
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/humanoidstandup_v5.py b/gymnasium/envs/mujoco/humanoidstandup_v5.py
index 9632a24d7..3dd1e77fe 100644
--- a/gymnasium/envs/mujoco/humanoidstandup_v5.py
+++ b/gymnasium/envs/mujoco/humanoidstandup_v5.py
@@ -274,7 +274,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
     * v3: This environment does not have a v3 release.
     * v2: All continuous control environments now use mujoco-py >= 1.50.
     * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
-    * v0: Initial versions release (1.0.0).
+    * v0: Initial versions release.
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py
index 1981f692e..e1d4cd059 100644
--- a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py
+++ b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py
@@ -133,7 +133,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
     * v3: This environment does not have a v3 release.
     * v2: All continuous control environments now use mujoco-py >= 1.50.
     * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum).
-    * v0: Initial versions release (1.0.0).
+    * v0: Initial versions release.
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/inverted_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_pendulum_v5.py
index 02916001f..9e1e3bd53 100644
--- a/gymnasium/envs/mujoco/inverted_pendulum_v5.py
+++ b/gymnasium/envs/mujoco/inverted_pendulum_v5.py
@@ -108,7 +108,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
     * v3: This environment does not have a v3 release.
     * v2: All continuous control environments now use mujoco-py >= 1.5.
     * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum).
-    * v0: Initial versions release (1.0.0).
+    * v0: Initial versions release.
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py
index 2fb8f4d8b..d3824480d 100644
--- a/gymnasium/envs/mujoco/pusher_v5.py
+++ b/gymnasium/envs/mujoco/pusher_v5.py
@@ -154,7 +154,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
     * v3: This environment does not have a v3 release.
     * v2: All continuous control environments now use mujoco-py >= 1.50.
     * v1: max_time_steps raised to 1000 for robot based tasks (not including pusher, which has a max_time_steps of 100). Added reward_threshold to environments.
-    * v0: Initial versions release (1.0.0).
+    * v0: Initial versions release.
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py
index a586ff2e1..5902cd16f 100644
--- a/gymnasium/envs/mujoco/reacher_v5.py
+++ b/gymnasium/envs/mujoco/reacher_v5.py
@@ -133,7 +133,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
     * v3: This environment does not have a v3 release.
     * v2: All continuous control environments now use mujoco-py >= 1.50
     * v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments.
-    * v0: Initial versions release (1.0.0)
+    * v0: Initial versions release
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/swimmer_v5.py b/gymnasium/envs/mujoco/swimmer_v5.py
index 4515c5012..2d87d9bbd 100644
--- a/gymnasium/envs/mujoco/swimmer_v5.py
+++ b/gymnasium/envs/mujoco/swimmer_v5.py
@@ -140,7 +140,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
     * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen).
     * v2: All continuous control environments now use mujoco-py >= 1.50.
     * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
-    * v0: Initial versions release (1.0.0).
+    * v0: Initial versions release.
     """
 
     metadata = {
diff --git a/gymnasium/envs/mujoco/walker2d_v5.py b/gymnasium/envs/mujoco/walker2d_v5.py
index f4f57ef1e..45c196ba3 100644
--- a/gymnasium/envs/mujoco/walker2d_v5.py
+++ b/gymnasium/envs/mujoco/walker2d_v5.py
@@ -160,7 +160,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
     * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
     * v2: All continuous control environments now use mujoco-py >= 1.50
     * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
-    * v0: Initial versions release (1.0.0)
+    * v0: Initial versions release
     """
 
     metadata = {