Update documentation (#848)

* Update doc * Fix potential bug in DummyVecEnv * Install gym on RTD * Add doc requirements * Revert "Add doc requirements" This reverts commit bdd2f914d5f3a356be45eca3f0986271003ae28f. * Add version.txt * Address comments * Add imitation learning baselines * Update install instructions * Remove MkDocs config since we do not use it Co-authored-by: Adam Gleave <adam@gleave.me>
Stable-Baselines-Team · May 12, 2020 · 6347da3 · 6347da3
1 parent a57c80e
commit 6347da3
Show file tree

Hide file tree

Showing 17 changed files with 156 additions and 51 deletions.
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -0,0 +1,18 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats: all
+
+# Optionally set the version of Python and requirements required to build your docs
+python:
+  version: 3.7
+  install:
+    - requirements: docs/requirements.txt
diff --git a/docs/common/monitor.rst b/docs/common/monitor.rst
@@ -0,0 +1,7 @@
+.. _monitor:
+
+Monitor Wrapper
+===============
+
+.. automodule:: stable_baselines.bench.monitor
+  :members:
diff --git a/docs/conf.py b/docs/conf.py
@@ -36,28 +36,29 @@ def __getattr__(cls, name):
 # Mock modules that requires C modules
 # Note: because of that we cannot test examples using CI
 MOCK_MODULES = ['joblib', 'scipy', 'scipy.signal',
-                'pandas', 'mpi4py', 'mujoco-py', 'cv2', 'tensorflow',
+                'mpi4py', 'mujoco-py', 'cv2', 'tensorflow',
                 'tensorflow.contrib', 'tensorflow.contrib.layers',
                 'tensorflow.python', 'tensorflow.python.client', 'tensorflow.python.ops',
-                'tqdm', 'cloudpickle', 'matplotlib', 'matplotlib.pyplot',
-                'seaborn', 'gym', 'gym.spaces', 'gym.core',
-                'tensorflow.core', 'tensorflow.core.util', 'tensorflow.python.util',
-                'gym.wrappers', 'gym.wrappers.monitoring', 'zmq']
+                'tqdm', 'matplotlib', 'matplotlib.pyplot',
+                'seaborn', 'tensorflow.core', 'tensorflow.core.util', 'tensorflow.python.util',
+                'zmq']
 sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 
-import stable_baselines
-
+# Read version from file
+version_file = os.path.join(os.path.dirname(__file__), '../stable_baselines', 'version.txt')
+with open(version_file, 'r') as file_handler:
+    __version__ = file_handler.read().strip()
 
 # -- Project information -----------------------------------------------------
 
 project = 'Stable Baselines'
-copyright = '2018-2019, Stable Baselines'
+copyright = '2018-2020, Stable Baselines'
 author = 'Stable Baselines Contributors'
 
 # The short X.Y version
-version = 'master (' + stable_baselines.__version__ + ' )'
+version = 'master (' + __version__ + ' )'
 # The full version, including alpha/beta/rc tags
-release = stable_baselines.__version__
+release = __version__
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/guide/algos.rst b/docs/guide/algos.rst
@@ -61,13 +61,13 @@ Reproducibility
 Completely reproducible results are not guaranteed across Tensorflow releases or different platforms.
 Furthermore, results need not be reproducible between CPU and GPU executions, even when using identical seeds.
 
-In order to make make computations deterministic on CPU, on your specific problem on one specific platform,
+In order to make computations deterministic on CPU, on your specific problem on one specific platform,
 you need to pass a ``seed`` argument at the creation of a model and set `n_cpu_tf_sess=1` (number of cpu for Tensorflow session).
 If you pass an environment to the model using `set_env()`, then you also need to seed the environment first.
 
 .. note::
 
-  Because of the current limits of Tensorflow 1.x, we cannot ensure reproducible results on the GPU yet. We hope to solve that issue with Tensorflow 2.x support (cf `Issue #366 <https://github.com/hill-a/stable-baselines/issues/366>`_).
+  Because of the current limits of Tensorflow 1.x, we cannot ensure reproducible results on the GPU yet. This issue is solved in `Stable-Baselines3 "PyTorch edition" <https://github.com/DLR-RM/stable-baselines3>`_
 
 
 .. note::

diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst
@@ -294,39 +294,58 @@ and multiprocessing for you.
       env.render()
 
 
-Mujoco: Normalizing input features
-----------------------------------
+PyBullet: Normalizing input features
+------------------------------------
 
 Normalizing input features may be essential to successful training of an RL agent
 (by default, images are scaled but not other types of input),
-for instance when training on `Mujoco <http://www.mujoco.org/>`_. For that, a wrapper exists and
+for instance when training on `PyBullet <https://github.com/bulletphysics/bullet3/>`_ environments. For that, a wrapper exists and
 will compute a running average and standard deviation of input features (it can do the same for rewards).
 
+
 .. note::
-  We cannot provide a notebook for this example
-  because Mujoco is a proprietary engine and requires a license.
+
+	you need to install pybullet with ``pip install pybullet``
 
 
 .. code-block:: python
 
+  import os
+
   import gym
+  import pybullet_envs
 
-  from stable_baselines.common.policies import MlpPolicy
   from stable_baselines.common.vec_env import DummyVecEnv, VecNormalize
   from stable_baselines import PPO2
 
-  env = DummyVecEnv([lambda: gym.make("Reacher-v2")])
-  # Automatically normalize the input features
-  env = VecNormalize(env, norm_obs=True, norm_reward=False,
+  env = DummyVecEnv([lambda: gym.make("HalfCheetahBulletEnv-v0")])
+  # Automatically normalize the input features and reward
+  env = VecNormalize(env, norm_obs=True, norm_reward=True,
                      clip_obs=10.)
 
-  model = PPO2(MlpPolicy, env)
+  model = PPO2('MlpPolicy', env)
   model.learn(total_timesteps=2000)
 
   # Don't forget to save the VecNormalize statistics when saving the agent
   log_dir = "/tmp/"
-  model.save(log_dir + "ppo_reacher")
-  env.save(os.path.join(log_dir, "vec_normalize.pkl"))
+  model.save(log_dir + "ppo_halfcheetah")
+  stats_path = os.path.join(log_dir, "vec_normalize.pkl")
+  env.save(stats_path)
+
+  # To demonstrate loading
+  del model, env
+
+  # Load the agent
+  model = PPO2.load(log_dir + "ppo_halfcheetah")
+
+  # Load the saved statistics
+  env = DummyVecEnv([lambda: gym.make("HalfCheetahBulletEnv-v0")])
+  env = VecNormalize.load(stats_path, env)
+  #  do not update them at test time
+  env.training = False
+  # reward normalization is not needed at test time
+  env.norm_reward = False
+
 
 
 Custom Policy Network

diff --git a/docs/guide/install.rst b/docs/guide/install.rst
@@ -12,8 +12,8 @@ installed as follows
 
 .. note::
 
-	Stable-Baselines supports Tensorflow versions from 1.8.0 to 1.14.0, and does not work on
-	Tensorflow versions 2.0.0 and above. Support for Tensorflow 2 API is planned.
+	Stable-Baselines supports Tensorflow versions from 1.8.0 to 1.15.0, and does not work on
+	Tensorflow versions 2.0.0 and above. PyTorch support is done in `Stable-Baselines3 <https://github.com/DLR-RM/stable-baselines3>`_
 
 
 Ubuntu
@@ -74,15 +74,30 @@ If you have already installed with MPI support, you can disable MPI by uninstall
 with ``pip uninstall mpi4py``.
 
 
+.. note::
+
+	Unless you are using the bleeding-edge version, you need to install the correct Tensorflow version manually. See `Issue #849 <https://github.com/hill-a/stable-baselines/issues/849>`_
+
+
 Bleeding-edge version
 ---------------------
 
-With support for running tests and building the documentation.
+To install the latest master version:
+
+.. code-block:: bash
+
+	pip install git+https://github.com/hill-a/stable-baselines
+
+
+Development version
+-------------------
+
+To contribute to Stable-Baselines, with support for running tests and building the documentation.
 
 .. code-block:: bash
 
     git clone https://github.com/hill-a/stable-baselines && cd stable-baselines
-    pip install -e .[docs,tests]
+    pip install -e .[docs,tests,mpi]
 
 
 Using Docker Images

diff --git a/docs/index.rst b/docs/index.rst
@@ -18,6 +18,10 @@ RL Baselines zoo also offers a simple interface to train, evaluate agents and do
 You can read a detailed presentation of Stable Baselines in the
 Medium article: `link <https://medium.com/@araffin/stable-baselines-a-fork-of-openai-baselines-reinforcement-learning-made-easy-df87c4b2fc82>`_
 
+.. note::
+
+  Stable-Baselines3 (PyTorch edition) beta is now online: https://github.com/DLR-RM/stable-baselines3
+
 
 .. Example of internal link: :ref:`ppo2`
 
@@ -84,6 +88,7 @@ This toolset is a fork of OpenAI Baselines, with a major structural refactoring,
   common/schedules
   common/evaluation
   common/env_checker
+  common/monitor
 
 .. toctree::
   :maxdepth: 1

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -26,18 +26,27 @@ Bug Fixes:
 - Fix consistency in ``setup_model()`` for SAC, ``target_entropy`` now uses ``self.action_space`` instead of ``self.env.action_space`` (@solliet)
 - Fix reward threshold in ``test_identity.py``
 - Partially fix tensorboard indexing for PPO2 (@enderdead)
+- Fixed potential bug in ``DummyVecEnv`` where ``copy()`` was used instead of ``deepcopy()``
 
 Deprecations:
 ^^^^^^^^^^^^^
 
 Others:
 ^^^^^^^
+- Added ``version.txt`` to manage version number in an easier way
+- Added ``.readthedocs.yml`` to install requirements with read the docs
 
 Documentation:
 ^^^^^^^^^^^^^^
 - Fix typos (@caburu)
 - Fix typos in PPO2 (@kvenkman)
 - Removed ``stable_baselines\deepq\experiments\custom_cartpole.py`` (@aakash94)
+- Added Google's motion imitation project
+- Added documentation page for monitor
+- Fixed typos and update ``VecNormalize`` example to show normalization at test-time
+- Fixed ``train_mountaincar`` description
+- Added imitation baselines project
+- Updated install instructions
 
 
 Release 2.10.0 (2020-03-11)
@@ -695,4 +704,3 @@ Thanks to @bjmuld @iambenzo @iandanforth @r7vme @brendenpetersen @huvar @abhiskk
 @MarvineGothic @jdossgollin @SyllogismRXS @rusu24edward @jbulow @Antymon @seheevic @justinkterry @edbeeching
 @flodorner @KuKuXia @NeoExtended @solliet @mmcenta @richardwu @tirafesi @caburu @johannes-dornheim @kvenkman @aakash94
 @enderdead
-
diff --git a/docs/misc/projects.rst b/docs/misc/projects.rst
@@ -108,7 +108,7 @@ WaveRL: Training RL agents to perform active damping
 ----------------------------------------------------
 Reinforcement learning is used to train agents to control pistons attached to a bridge to cancel out vibrations.  The bridge is modeled as a one dimensional oscillating system and dynamics are simulated using a finite difference solver.  Agents were trained using Proximal Policy Optimization.  See presentation for environment detalis.
 
-| Authors: Jack Berkowitz
+| Author: Jack Berkowitz
 | Email: jackberkowitz88@gmail.com
 | GitHub: https://github.com/jaberkow/WaveRL
 | Presentation: http://bit.ly/WaveRLslides
@@ -140,7 +140,7 @@ Designing an autonomous robot to perform a task involves interactions between va
 
 Snake Game AI
 --------------------------
-AI to play the classic snake game. 
+AI to play the classic snake game.
 The game was trained using PPO2 available from stable-baselines and
 then exported to tensorflowjs to run directly on the browser
 
@@ -160,11 +160,10 @@ Pwnagotchi is an A2C-based “AI” powered by bettercap and running on a Raspbe
 
 Quantized Reinforcement Learning (QuaRL)
 ----------------------------------------
-
 QuaRL is a open-source framework to study the effects of quantization broad spectrum of reinforcement learning algorithms. The RL algorithms we used in
 this study are from stable-baselines.
 
-| Author: Srivatsan Krishnan, Sharad Chitlangia, Maximilian Lam, Zishen Wan, Aleksandra Faust, Vijay Janapa Reddi
+| Authors: Srivatsan Krishnan, Sharad Chitlangia, Maximilian Lam, Zishen Wan, Aleksandra Faust, Vijay Janapa Reddi
 | Email: srivatsan@seas.harvard.edu
 | Github: https://github.com/harvard-edge/quarl
 | Paper: https://arxiv.org/pdf/1910.01055.pdf
@@ -175,6 +174,25 @@ PPO_CPP: C++ version of a Deep Reinforcement Learning algorithm PPO
 Executes PPO at C++ level yielding notable execution performance speedups.
 Uses Stable Baselines to create a computational graph which is then used for training with custom environments by machine-code-compiled binary.
 
-| Authors: Szymon Brych
+| Author: Szymon Brych
 | Email: szymon.brych@gmail.com
 | GitHub: https://github.com/Antymon/ppo_cpp
+
+
+Learning Agile Robotic Locomotion Skills by Imitating Animals
+-------------------------------------------------------------
+Learning locomotion gaits by imitating animals. It uses PPO1 and AWR.
+
+| Authors: Xue Bin Peng, Erwin Coumans, Tingnan Zhang, Tsang-Wei Lee, Jie Tan, Sergey Levine
+| Website: https://xbpeng.github.io/projects/Robotic_Imitation/index.html
+| Github: https://github.com/google-research/motion_imitation
+| Paper: https://arxiv.org/abs/2004.00784
+
+
+Imitation Learning Baseline Implementations
+-------------------------------------------
+This project aims to provide clean implementations of imitation learning algorithms.
+Currently we have implementations of AIRL and GAIL, and intend to add more in the future.
+
+| Authors: Adam Gleave, Steven Wang, Nevan Wichers, Sam Toyer
+| Github: https://github.com/HumanCompatibleAI/imitation
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -0,0 +1,2 @@
+gym
+pandas
diff --git a/scripts/build_docker.sh b/scripts/build_docker.sh
@@ -4,7 +4,7 @@ CPU_PARENT=ubuntu:16.04
 GPU_PARENT=nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 
 TAG=stablebaselines/stable-baselines
-VERSION=v2.10.0
+VERSION=$(cat ./stable_baselines/version.txt)
 
 if [[ ${USE_GPU} == "True" ]]; then
   PARENT=${GPU_PARENT}

diff --git a/setup.py b/setup.py
@@ -8,6 +8,11 @@
     print('This Python is only compatible with Python 3, but you are running '
           'Python {}. The installation will likely fail.'.format(sys.version_info.major))
 
+# Read version from file
+with open(os.path.join('stable_baselines', 'version.txt'), 'r') as file_handler:
+    __version__ = file_handler.read().strip()
+
+
 # Check tensorflow installation to avoid
 # breaking pre-installed tf gpu
 def find_tf_dependency():
@@ -114,7 +119,7 @@ def find_tf_dependency():
       packages=[package for package in find_packages()
                 if package.startswith('stable_baselines')],
       package_data={
-          'stable_baselines': ['py.typed'],
+          'stable_baselines': ['py.typed', 'version.txt'],
       },
       install_requires=[
           'gym[atari,classic_control]>=0.11',
@@ -152,7 +157,7 @@ def find_tf_dependency():
       license="MIT",
       long_description=long_description,
       long_description_content_type='text/markdown',
-      version="2.10.1a0",
+      version=__version__,
       )
 
 # python setup.py sdist

diff --git a/stable_baselines/__init__.py b/stable_baselines/__init__.py
@@ -1,3 +1,5 @@
+import os
+
 from stable_baselines.a2c import A2C
 from stable_baselines.acer import ACER
 from stable_baselines.acktr import ACKTR
@@ -20,4 +22,7 @@
     from stable_baselines.trpo_mpi import TRPO
 del mpi4py
 
-__version__ = "2.10.1a0"
+# Read version from file
+version_file = os.path.join(os.path.dirname(__file__), 'version.txt')
+with open(version_file, 'r') as file_handler:
+    __version__ = file_handler.read().strip()
diff --git a/stable_baselines/bench/monitor.py b/stable_baselines/bench/monitor.py
@@ -13,24 +13,23 @@
 
 
 class Monitor(gym.Wrapper):
+    """
+    A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
+
+    :param env: (gym.Env) The environment
+    :param filename: (Optional[str]) the location to save a log file, can be None for no log
+    :param allow_early_resets: (bool) allows the reset of the environment before it is done
+    :param reset_keywords: (tuple) extra keywords for the reset call, if extra parameters are needed at reset
+    :param info_keywords: (tuple) extra information to log, from the information return of environment.step
+    """
     EXT = "monitor.csv"
     file_handler = None
-
     def __init__(self,
                  env: gym.Env,
                  filename: Optional[str],
                  allow_early_resets: bool = True,
                  reset_keywords=(),
                  info_keywords=()):
-        """
-        A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
-
-        :param env: (gym.Env) The environment
-        :param filename: (Optional[str]) the location to save a log file, can be None for no log
-        :param allow_early_resets: (bool) allows the reset of the environment before it is done
-        :param reset_keywords: (tuple) extra keywords for the reset call, if extra parameters are needed at reset
-        :param info_keywords: (tuple) extra information to log, from the information return of environment.step
-        """
         super(Monitor, self).__init__(env=env)
         self.t_start = time.time()
         if filename is None: