Update doc

DLR-RM · May 8, 2020 · 8a61913 · 8a61913
1 parent 97aea21
commit 8a61913
Show file tree

Hide file tree

Showing 11 changed files with 66 additions and 17 deletions.
diff --git a/docs/common/atari_wrappers.rst b/docs/common/atari_wrappers.rst
@@ -0,0 +1,7 @@
+.. _atari_wrapper:
+
+Atari Wrappers
+==============
+
+.. automodule:: stable_baselines3.common.atari_wrappers
+  :members:
diff --git a/docs/common/cmd_utils.rst b/docs/common/cmd_utils.rst
@@ -0,0 +1,7 @@
+.. _cmd_utils:
+
+Command Utils
+=========================
+
+.. automodule:: stable_baselines3.common.cmd_util
+  :members:
diff --git a/docs/common/logger.rst b/docs/common/logger.rst
@@ -0,0 +1,7 @@
+.. _logger:
+
+Logger
+======
+
+.. automodule:: stable_baselines3.common.logger
+  :members:
diff --git a/docs/common/monitor.rst b/docs/common/monitor.rst
@@ -0,0 +1,7 @@
+.. _monitor:
+
+Monitor Wrapper
+===============
+
+.. automodule:: stable_baselines3.common.monitor
+  :members:
diff --git a/docs/common/noise.rst b/docs/common/noise.rst
@@ -0,0 +1,7 @@
+.. _noise:
+
+Action Noise
+=============
+
+.. automodule:: stable_baselines3.common.noise
+  :members:
diff --git a/docs/common/utils.rst b/docs/common/utils.rst
@@ -0,0 +1,7 @@
+.. _utils:
+
+Utils
+=====
+
+.. automodule:: stable_baselines3.common.utils
+  :members:
diff --git a/docs/guide/rl_tips.rst b/docs/guide/rl_tips.rst
@@ -39,7 +39,7 @@ however, *don't expect the default ones to work* on any environment.
 Therefore, we *highly recommend you* to take a look at the `RL zoo <https://github.com/DLR-RM/rl-baselines3-zoo>`_ (or the original papers) for tuned hyperparameters.
 A best practice when you apply RL to a new problem is to do automatic hyperparameter optimization. Again, this is included in the `RL zoo <https://github.com/DLR-RM/rl-baselines3-zoo>`_.
 
-When applying RL to a custom problem, you should always normalize the input to the agent (e.g. using VecNormalize for PPO2/A2C)
+When applying RL to a custom problem, you should always normalize the input to the agent (e.g. using VecNormalize for PPO/A2C)
 and look at common preprocessing done on other environments (e.g. for `Atari <https://danieltakeshi.github.io/2016/11/25/frame-skipping-and-preprocessing-for-deep-q-networks-on-atari-2600-games/>`_, frame-stack, ...).
 Please refer to *Tips and Tricks when creating a custom environment* paragraph below for more advice related to custom environments.
 
@@ -137,7 +137,7 @@ Please use the hyperparameters in the `RL zoo <https://github.com/DLR-RM/rl-base
 Continuous Actions - Multiprocessed
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Take a look at PPO2, TRPO or A2C. Again, don't forget to take the hyperparameters from the `RL zoo <https://github.com/DLR-RM/rl-baselines3-zoo>`_
+Take a look at PPO, TRPO or A2C. Again, don't forget to take the hyperparameters from the `RL zoo <https://github.com/DLR-RM/rl-baselines3-zoo>`_
 for continuous actions problems (cf *Bullet* envs).
 
 .. note::

diff --git a/docs/index.rst b/docs/index.rst
@@ -3,11 +3,11 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
-Welcome to Stable Baselines3 docs!
-==================================
+Welcome to Stable Baselines3 docs! - RL Baselines Made Easy
+===========================================================
 
 `Stable Baselines3 <https://github.com/DLR-RM/stable-baselines3>`_ is a set of improved implementations of reinforcement learning algorithms in PyTorch.
-It is the next major version (PyTorch edition) of `Stable Baselines <https://github.com/hill-a/stable-baselines>`_.
+It is the next major version of `Stable Baselines <https://github.com/hill-a/stable-baselines>`_.
 
 
 Github repository: https://github.com/DLR-RM/stable-baselines3
@@ -61,9 +61,15 @@ Main Features
   :maxdepth: 1
   :caption: Common
 
+  common/atari_wrappers
+  common/cmd_utils
   common/distributions
   common/evaluation
   common/env_checker
+  common/monitor
+  common/logger
+  common/noise
+  common/utils
 
 .. toctree::
   :maxdepth: 1

diff --git a/stable_baselines3/common/atari_wrappers.py b/stable_baselines3/common/atari_wrappers.py
@@ -33,10 +33,11 @@ class AtariWrapper(gym.Wrapper):
     :param screen_size: (int): resize Atari frame
     :param terminal_on_life_loss: (bool): if True, then step() returns done=True whenever a
             life is lost.
-    :param grayscale_obs: (bool): if True, then gray scale observation is returned, otherwise, RGB observation
+    :param grayscale_obs: (bool): if True (default), then gray scale observation is returned, otherwise, RGB observation
             is returned.
     :param scale_obs: (bool): if True, then observation normalized in range [0,1] is returned. It also limits memory
             optimization benefits of FrameStack Wrapper.
+    :param scale_obs: (bool) If True (default), the reward is clip to {-1, 0, 1} depending on its sign.
     """
     def __init__(self, env: gym.Env,
                  noop_max: int = 30,

diff --git a/stable_baselines3/common/cmd_util.py b/stable_baselines3/common/cmd_util.py
@@ -94,7 +94,7 @@ def make_atari_env(env_id: Union[str, Type[gym.Env]],
     :param monitor_dir: (str) Path to a folder where the monitor files will be saved.
         If None, no file will be written, however, the env will still be wrapped
         in a Monitor wrapper to provide additional information about training.
-    :param wrapper_kwargs: (Dict[str, Any]) Optional keyword argument to pass to the env constructor
+    :param wrapper_kwargs: (Dict[str, Any]) Optional keyword argument to pass to the ``AtariWrapper``
     :param env_kwargs: (Dict[str, Any]) Optional keyword argument to pass to the env constructor
     :param vec_env_cls: (Type[VecEnv]) A custom `VecEnv` class constructor. Default: None.
     :param vec_env_kwargs: (Dict[str, Any]) Keyword arguments to pass to the `VecEnv` class constructor.

diff --git a/stable_baselines3/common/monitor.py b/stable_baselines3/common/monitor.py
@@ -13,6 +13,16 @@
 
 
 class Monitor(gym.Wrapper):
+    """
+    A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
+
+    :param env: (gym.Env) The environment
+    :param filename: (Optional[str]) the location to save a log file, can be None for no log
+    :param allow_early_resets: (bool) allows the reset of the environment before it is done
+    :param reset_keywords: (Tuple[str, ...]) extra keywords for the reset call,
+        if extra parameters are needed at reset
+    :param info_keywords: (Tuple[str, ...]) extra information to log, from the information return of env.step()
+    """
     EXT = "monitor.csv"
 
     def __init__(self,
@@ -21,16 +31,6 @@ def __init__(self,
                  allow_early_resets: bool = True,
                  reset_keywords: Tuple[str, ...] = (),
                  info_keywords: Tuple[str, ...] = ()):
-        """
-        A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
-
-        :param env: (gym.Env) The environment
-        :param filename: (Optional[str]) the location to save a log file, can be None for no log
-        :param allow_early_resets: (bool) allows the reset of the environment before it is done
-        :param reset_keywords: (Tuple[str, ...]) extra keywords for the reset call,
-            if extra parameters are needed at reset
-        :param info_keywords: (Tuple[str, ...]) extra information to log, from the information return of env.step()
-        """
         super(Monitor, self).__init__(env=env)
         self.t_start = time.time()
         if filename is None: