Release v2.9.0 (#629)

* Bump version * Add a message to PPO2 assert (closes #625) * Update replay buffer doctring (closes #610) * Don't specify a version for pytype * Fix `VecEnv` docstrings (closes #577) * Typo * Re-add python version for pytype
Stable-Baselines-Team · Dec 19, 2019 · 98e9ee9 · 98e9ee9
1 parent 99dcdba
commit 98e9ee9
Show file tree

Hide file tree

Showing 7 changed files with 21 additions and 10 deletions.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -6,9 +6,11 @@ Changelog
 For download links, please look at `Github release page <https://github.com/hill-a/stable-baselines/releases>`_.
 
 
-Pre-Release 2.9.0a0 (WIP)
+Release 2.9.0 (2019-12-20)
 --------------------------
 
+*Reproducible results, automatic `VecEnv` wrapping, env checker and more usability improvements*
+
 Breaking Changes:
 ^^^^^^^^^^^^^^^^^
 - The `seed` argument has been moved from `learn()` method to model constructor
@@ -59,6 +61,9 @@ Others:
 - Add pull request template
 - Replaced redundant code in load_results (@jbulow)
 - Minor PEP8 fixes in dqn.py (@justinkterry)
+- Add a message to the assert in `PPO2`
+- Update replay buffer doctring
+- Fix `VecEnv` docstrings
 
 Documentation:
 ^^^^^^^^^^^^^^

diff --git a/setup.py b/setup.py
@@ -146,7 +146,7 @@
       license="MIT",
       long_description=long_description,
       long_description_content_type='text/markdown',
-      version="2.9.0a0",
+      version="2.9.0",
       )
 
 # python setup.py sdist

diff --git a/stable_baselines/__init__.py b/stable_baselines/__init__.py
@@ -20,4 +20,4 @@
     from stable_baselines.trpo_mpi import TRPO
 del mpi4py
 
-__version__ = "2.9.0a0"
+__version__ = "2.9.0"
diff --git a/stable_baselines/common/vec_env/dummy_vec_env.py b/stable_baselines/common/vec_env/dummy_vec_env.py
@@ -12,7 +12,8 @@ class DummyVecEnv(VecEnv):
     multiprocess or multithread outweighs the environment computation time. This can also be used for RL methods that
     require a vectorized environment, but that you want a single environments to train with.
 
-    :param env_fns: ([Gym Environment]) the list of environments to vectorize
+    :param env_fns: ([callable]) A list of functions that will create the environments
+        (each callable returns a `Gym.Env` instance when called).
     """
 
     def __init__(self, env_fns):

diff --git a/stable_baselines/common/vec_env/subproc_vec_env.py b/stable_baselines/common/vec_env/subproc_vec_env.py
@@ -62,7 +62,8 @@ class SubprocVecEnv(VecEnv):
         ``if __name__ == "__main__":`` block.
         For more information, see the multiprocessing documentation.
 
-    :param env_fns: ([Gym Environment]) Environments to run in subprocesses
+    :param env_fns: ([callable]) A list of functions that will create the environments
+        (each callable returns a `Gym.Env` instance when called).
     :param start_method: (str) method used to start the subprocesses.
            Must be one of the methods returned by multiprocessing.get_all_start_methods().
            Defaults to 'forkserver' on available platforms, and 'spawn' otherwise.

diff --git a/stable_baselines/deepq/replay_buffer.py b/stable_baselines/deepq/replay_buffer.py
@@ -22,7 +22,7 @@ def __len__(self):
 
     @property
     def storage(self):
-        """[(np.ndarray, float, float, np.ndarray, bool)]: content of the replay buffer"""
+        """[(Union[np.ndarray, int], Union[np.ndarray, int], float, Union[np.ndarray, int], bool)]: content of the replay buffer"""
         return self._storage
 
     @property
@@ -52,10 +52,10 @@ def add(self, obs_t, action, reward, obs_tp1, done):
         """
         add a new transition to the buffer
 
-        :param obs_t: (Any) the last observation
-        :param action: ([float]) the action
+        :param obs_t: (Union[np.ndarray, int]) the last observation
+        :param action: (Union[np.ndarray, int]) the action
         :param reward: (float) the reward of the transition
-        :param obs_tp1: (Any) the current observation
+        :param obs_tp1: (Union[np.ndarray, int]) the current observation
         :param done: (bool) is the episode done
         """
         data = (obs_t, action, reward, obs_tp1, done)

diff --git a/stable_baselines/ppo2/ppo2.py b/stable_baselines/ppo2/ppo2.py
@@ -324,7 +324,11 @@ def learn(self, total_timesteps, callback=None, log_interval=1, tb_log_name="PPO
 
             n_updates = total_timesteps // self.n_batch
             for update in range(1, n_updates + 1):
-                assert self.n_batch % self.nminibatches == 0
+                assert self.n_batch % self.nminibatches == 0, ("The number of minibatches (`nminibatches`) "
+                                                               "is not a factor of the total number of samples "
+                                                               "collected per rollout (`n_batch`), "
+                                                               "some samples won't be used."
+                                                               )
                 batch_size = self.n_batch // self.nminibatches
                 t_start = time.time()
                 frac = 1.0 - (update - 1.0) / n_updates