Update the classic control arguments doc sections (#898)

Farama-Foundation · Jan 29, 2024 · de909da · de909da
1 parent af3d6d7
commit de909da
Show file tree

Hide file tree

Showing 19 changed files with 159 additions and 143 deletions.
diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py
@@ -142,11 +142,15 @@ class BipedalWalker(gym.Env, EzPickle):
     if the walker exceeds the right end of the terrain length.
 
     ## Arguments
-    To use the _hardcore_ environment, you need to specify the
-    `hardcore=True` argument like below:
+
+    To use the _hardcore_ environment, you need to specify the `hardcore=True`:
+
     ```python
-    import gymnasium as gym
-    env = gym.make("BipedalWalker-v3", hardcore=True)
+    >>> import gymnasium as gym
+    >>> env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<BipedalWalker<BipedalWalker-v3>>>>>
+
     ```
 
     ## Version History

diff --git a/gymnasium/envs/box2d/car_racing.py b/gymnasium/envs/box2d/car_racing.py
@@ -115,7 +115,7 @@ class CarRacing(gym.Env, EzPickle):
     state RGB buffer. From left to right: true speed, four ABS sensors,
     steering wheel position, and gyroscope.
     To play yourself (it's rather fast for humans), type:
-    ```
+    ```shell
     python gymnasium/envs/box2d/car_racing.py
     ```
     Remember: it's a powerful rear-wheel drive car - don't press the accelerator
@@ -139,46 +139,54 @@ class CarRacing(gym.Env, EzPickle):
     A top-down 96x96 RGB image of the car and race track.
 
     ## Rewards
-    The reward is -0.1 every frame and +1000/N for every track tile visited,
-    where N is the total number of tiles visited in the track. For example,
-    if you have finished in 732 frames, your reward is
-    1000 - 0.1*732 = 926.8 points.
+    The reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles
+     visited in the track. For example, if you have finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points.
 
     ## Starting State
     The car starts at rest in the center of the road.
 
     ## Episode Termination
-    The episode finishes when all the tiles are visited. The car can also go
-    outside the playfield - that is, far off the track, in which case it will
-    receive -100 reward and die.
+    The episode finishes when all the tiles are visited. The car can also go outside the playfield -
+     that is, far off the track, in which case it will receive -100 reward and die.
 
     ## Arguments
-    `lap_complete_percent` dictates the percentage of tiles that must be visited by
-    the agent before a lap is considered complete.
 
-    Passing `domain_randomize=True` enables the domain randomized variant of the environment.
-    In this scenario, the background and track colours are different on every reset.
+    ```python
+    >>> import gymnasium as gym
+    >>> env = gym.make("CarRacing-v2", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=False)
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v2>>>>>
+
+    ```
+
+    * `lap_complete_percent=0.95` dictates the percentage of tiles that must be visited by
+     the agent before a lap is considered complete.
 
-    Passing `continuous=False` converts the environment to use discrete action space.
-    The discrete action space has 5 actions: [do nothing, left, right, gas, brake].
+    * `domain_randomize=False` enables the domain randomized variant of the environment.
+     In this scenario, the background and track colours are different on every reset.
+
+    * `continuous=True` converts the environment to use discrete action space.
+     The discrete action space has 5 actions: [do nothing, left, right, gas, brake].
 
     ## Reset Arguments
+
     Passing the option `options["randomize"] = True` will change the current colour of the environment on demand.
     Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment.
     `domain_randomize` must be `True` on init for this argument to work.
-    Example usage:
+
     ```python
-    import gymnasium as gym
-    env = gym.make("CarRacing-v1", domain_randomize=True)
+    >>> import gymnasium as gym
+    >>> env = gym.make("CarRacing-v2", domain_randomize=True)
 
     # normal reset, this changes the colour scheme by default
-    env.reset()
+    >>> obs, _ = env.reset()
 
     # reset with colour scheme change
-    env.reset(options={"randomize": True})
+    >>> randomize_obs, _ = env.reset(options={"randomize": True})
 
     # reset with no colour scheme change
-    env.reset(options={"randomize": False})
+    >>> non_random_obs, _ = env.reset(options={"randomize": False})
+
     ```
 
     ## Version History

diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py
@@ -93,7 +93,7 @@ class LunarLander(gym.Env, EzPickle):
     can learn to fly and then land on its first attempt.
 
     To see a heuristic landing, run:
-    ```
+    ```shell
     python gymnasium/envs/box2d/lunar_lander.py
     ```
     <!-- To play yourself, run: -->
@@ -145,74 +145,60 @@ class LunarLander(gym.Env, EzPickle):
     > them is destroyed.
 
     ## Arguments
-    To use the _continuous_ environment, you need to specify the
-    `continuous=True` argument like below:
+
+    Lunar Lander has a large number of arguments
+
     ```python
-    import gymnasium as gym
-    env = gym.make(
-        "LunarLander-v2",
-        continuous: bool = False,
-        gravity: float = -10.0,
-        enable_wind: bool = False,
-        wind_power: float = 15.0,
-        turbulence_power: float = 1.5,
-    )
+    >>> import gymnasium as gym
+    >>> env = gym.make("LunarLander-v2", continuous=False, gravity=-10.0,
+    ...                enable_wind=False, wind_power=15.0, turbulence_power=1.5)
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<LunarLander<LunarLander-v2>>>>>
+
     ```
-    If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the
-    action space will be `Box(-1, +1, (2,), dtype=np.float32)`.
-    The first coordinate of an action determines the throttle of the main engine, while the second
-    coordinate specifies the throttle of the lateral boosters.
-    Given an action `np.array([main, lateral])`, the main engine will be turned off completely if
-    `main < 0` and the throttle scales affinely from 50% to 100% for `0 <= main <= 1` (in particular, the
-    main engine doesn't work  with less than 50% power).
-    Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left
-    booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely
-    from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively).
-
-    `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12.
-
-    If `enable_wind=True` is passed, there will be wind effects applied to the lander.
-    The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`.
-    `k` is set to 0.01.
-    `C` is sampled randomly between -9999 and 9999.
-
-    `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0.
-    `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0.
+
+     * `continuous` determines if discrete or continuous actions (corresponding to the throttle of the engines) will be used with the
+     action space being `Discrete(4)` or `Box(-1, +1, (2,), dtype=np.float32)` respectively.
+     For continuous actions, the first coordinate of an action determines the throttle of the main engine, while the second
+     coordinate specifies the throttle of the lateral boosters. Given an action `np.array([main, lateral])`, the main
+     engine will be turned off completely if `main < 0` and the throttle scales affinely from 50% to 100% for
+     `0 <= main <= 1` (in particular, the main engine doesn't work  with less than 50% power).
+     Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left
+     booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely
+     from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively).
+
+    * `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12. Default is -10.0
+
+    * `enable_wind` determines if there will be wind effects applied to the lander. The wind is generated using
+     the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))` where `k` is set to 0.01 and `C` is sampled randomly between -9999 and 9999.
+
+    * `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for
+     `wind_power` is between 0.0 and 20.0.
+
+    * `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft.
+     The recommended value for `turbulence_power` is between 0.0 and 2.0.
 
     ## Version History
     - v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters
-    - v1: Legs contact with ground added in state vector; contact with ground
-        give +10 reward points, and -10 if then lose contact; reward
-        renormalized to 200; harder initial random push.
+    - v1: Legs contact with ground added in state vector; contact with ground give +10 reward points,
+          and -10 if then lose contact; reward renormalized to 200; harder initial random push.
     - v0: Initial version
 
-
     ## Notes
 
     There are several unexpected bugs with the implementation of the environment.
 
-    1. The position of the side thursters on the body of the lander changes, depending on the orientation of the lander.
-    This in turn results in an orientation depentant torque being applied to the lander.
+    1. The position of the side thrusters on the body of the lander changes, depending on the orientation of the lander.
+    This in turn results in an orientation dependent torque being applied to the lander.
 
     2. The units of the state are not consistent. I.e.
     * The angular velocity is in units of 0.4 radians per second. In order to convert to radians per second, the value needs to be multiplied by a factor of 2.5.
 
     For the default values of VIEWPORT_W, VIEWPORT_H, SCALE, and FPS, the scale factors equal:
-    'x': 10
-    'y': 6.666
-    'vx': 5
-    'vy': 7.5
-    'angle': 1
-    'angular velocity': 2.5
+    'x': 10, 'y': 6.666, 'vx': 5, 'vy': 7.5, 'angle': 1, 'angular velocity': 2.5
 
     After the correction has been made, the units of the state are as follows:
-    'x': (units)
-    'y': (units)
-    'vx': (units/second)
-    'vy': (units/second)
-    'angle': (radians)
-    'angular velocity': (radians/second)
-
+    'x': (units), 'y': (units), 'vx': (units/second), 'vy': (units/second), 'angle': (radians), 'angular velocity': (radians/second)
 
     <!-- ## References -->
 

diff --git a/gymnasium/envs/classic_control/acrobot.py b/gymnasium/envs/classic_control/acrobot.py
@@ -96,15 +96,19 @@ class AcrobotEnv(Env):
 
     ## Arguments
 
-    No additional arguments are currently supported during construction.
+    Acrobot only has `render_mode` as a keyword for `gymnasium.make`.
+    On reset, the `options` parameter allows the user to change the bounds used to determine the new random state.
 
     ```python
-    import gymnasium as gym
-    env = gym.make('Acrobot-v1')
-    ```
+    >>> import gymnasium as gym
+    >>> env = gym.make('Acrobot-v1', render_mode="rgb_array")
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<AcrobotEnv<Acrobot-v1>>>>>
+    >>> env.reset(seed=123, options={"low": -0.2, "high": 0.2})  # default low=-0.1, high=0.1
+    (array([ 0.997341  ,  0.07287608,  0.9841162 , -0.17752565, -0.11185605,
+           -0.12625128], dtype=float32), {})
 
-    On reset, the `options` parameter allows the user to change the bounds used to determine
-    the new random state.
+    ```
 
     By default, the dynamics of the acrobot follow those described in Sutton and Barto's book
     [Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html).
@@ -118,20 +122,17 @@ class AcrobotEnv(Env):
 
     See the following note for details:
 
-    > The dynamics equations were missing some terms in the NIPS paper which
-            are present in the book. R. Sutton confirmed in personal correspondence
-            that the experimental results shown in the paper and the book were
-            generated with the equations shown in the book.
-            However, there is the option to run the domain with the paper equations
-            by setting `book_or_nips = 'nips'`
-
+    > The dynamics equations were missing some terms in the NIPS paper which are present in the book.
+      R. Sutton confirmed in personal correspondence that the experimental results shown in the paper and the book were
+      generated with the equations shown in the book. However, there is the option to run the domain with the paper equations
+      by setting `book_or_nips = 'nips'`
 
     ## Version History
 
     - v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of
     `theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the
     sine and cosine of each angle instead.
-    - v0: Initial versions release (1.0.0) (removed from gymnasium for v1)
+    - v0: Initial versions release
 
     ## References
     - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding.
@@ -383,8 +384,8 @@ def close(self):
 
 
 def wrap(x, m, M):
-    """Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
-    truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
+    """Wraps `x` so m <= x <= M; but unlike `bound()` which
+    truncates, `wrap()` wraps x around the coordinate system defined by m,M.\n
     For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
 
     Args:
@@ -439,7 +440,7 @@ def rk4(derivs, y0, t):
         >>> yout = rk4(derivs, y0, t)
 
     Args:
-        derivs: the derivative of the system and has the signature ``dy = derivs(yi)``
+        derivs: the derivative of the system and has the signature `dy = derivs(yi)`
         y0: initial state vector
         t: sample times
 

diff --git a/gymnasium/envs/classic_control/cartpole.py b/gymnasium/envs/classic_control/cartpole.py
@@ -74,29 +74,33 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
 
     ## Arguments
 
-    Cartpole only has ``render_mode`` as a keyword for ``gymnasium.make``.
+    Cartpole only has `render_mode` as a keyword for `gymnasium.make`.
     On reset, the `options` parameter allows the user to change the bounds used to determine the new random state.
 
-    Examples:
-        >>> import gymnasium as gym
-        >>> env = gym.make("CartPole-v1", render_mode="rgb_array")
-        >>> env
-        <TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
-        >>> env.reset(seed=123, options={"low": 0, "high": 1})
-        (array([0.6823519 , 0.05382102, 0.22035988, 0.18437181], dtype=float32), {})
+    ```python
+    >>> import gymnasium as gym
+    >>> env = gym.make("CartPole-v1", render_mode="rgb_array")
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
+    >>> env.reset(seed=123, options={"low": -0.1, "high": 0.1})  # default low=-0.05, high=0.05
+    (array([ 0.03647037, -0.0892358 , -0.05592803, -0.06312564], dtype=float32), {})
+
+    ```
 
     ## Vectorized environment
 
     To increase steps per seconds, users can use a custom vector environment or with an environment vectorizor.
 
-    Examples:
-        >>> import gymnasium as gym
-        >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="vector_entry_point")
-        >>> envs
-        CartPoleVectorEnv(CartPole-v1, num_envs=3)
-        >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
-        >>> envs
-        SyncVectorEnv(CartPole-v1, num_envs=3)
+    ```python
+    >>> import gymnasium as gym
+    >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="vector_entry_point")
+    >>> envs
+    CartPoleVectorEnv(CartPole-v1, num_envs=3)
+    >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
+    >>> envs
+    SyncVectorEnv(CartPole-v1, num_envs=3)
+
+    ```
     """
 
     metadata = {

diff --git a/gymnasium/envs/classic_control/continuous_mountain_car.py b/gymnasium/envs/classic_control/continuous_mountain_car.py
@@ -91,17 +91,22 @@ class Continuous_MountainCarEnv(gym.Env):
 
     ## Arguments
 
+    Continuous Mountain Car has two parameters for `gymnasium.make` with `render_mode` and `goal_velocity`.
+    On reset, the `options` parameter allows the user to change the bounds used to determine the new random state.
+
     ```python
-    import gymnasium as gym
-    gym.make('MountainCarContinuous-v0')
-    ```
+    >>> import gymnasium as gym
+    >>> env = gym.make("MountainCarContinuous-v0", render_mode="rgb_array", goal_velocity=0.1)  # default goal_velocity=0
+    >>> env
+    <TimeLimit<OrderEnforcing<PassiveEnvChecker<Continuous_MountainCarEnv<MountainCarContinuous-v0>>>>>
+    >>> env.reset(seed=123, options={"low": -0.7, "high": -0.5})  # default low=-0.6, high=-0.4
+    (array([-0.5635296,  0.       ], dtype=float32), {})
 
-    On reset, the `options` parameter allows the user to change the bounds used to determine
-    the new random state.
+    ```
 
     ## Version History
 
-    * v0: Initial versions release (1.0.0)
+    * v0: Initial versions release
     """
 
     metadata = {