Fixed DDPG sampling empty replay buffer when combined with HER (#746)

* fixes #743 * Update changelog * Bump version [ci skip] * Bump version Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
Stable-Baselines-Team · Mar 17, 2020 · 3069a0e · 3069a0e
1 parent cfcdb2f
commit 3069a0e
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 3 deletions.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -6,6 +6,29 @@ Changelog
 For download links, please look at `Github release page <https://github.com/hill-a/stable-baselines/releases>`_.
 
 
+Pre-Release 2.10.1a0 (WIP)
+---------------------------
+
+Breaking Changes:
+^^^^^^^^^^^^^^^^^
+
+New Features:
+^^^^^^^^^^^^^
+
+Bug Fixes:
+^^^^^^^^^^
+- Fixed DDPG sampling empty replay buffer when combined with HER  (@tirafesi)
+
+Deprecations:
+^^^^^^^^^^^^^
+
+Others:
+^^^^^^^
+
+Documentation:
+^^^^^^^^^^^^^^
+
+
 Release 2.10.0 (2020-03-11)
 ---------------------------
 
@@ -659,4 +682,4 @@ Thanks to @bjmuld @iambenzo @iandanforth @r7vme @brendenpetersen @huvar @abhiskk
 @XMaster96 @kantneel @Pastafarianist @GerardMaggiolino @PatrickWalter214 @yutingsz @sc420 @Aaahh @billtubbs
 @Miffyli @dwiel @miguelrass @qxcv @jaberkow @eavelardev @ruifeng96150 @pedrohbtp @srivatsankrishnan @evilsocket
 @MarvineGothic @jdossgollin @SyllogismRXS @rusu24edward @jbulow @Antymon @seheevic @justinkterry @edbeeching
-@flodorner @KuKuXia @NeoExtended @solliet @mmcenta @richardwu
+@flodorner @KuKuXia @NeoExtended @solliet @mmcenta @richardwu @tirafesi
diff --git a/setup.py b/setup.py
@@ -152,7 +152,7 @@ def find_tf_dependency():
       license="MIT",
       long_description=long_description,
       long_description_content_type='text/markdown',
-      version="2.10.0",
+      version="2.10.1a0",
       )
 
 # python setup.py sdist

diff --git a/stable_baselines/__init__.py b/stable_baselines/__init__.py
@@ -20,4 +20,4 @@
     from stable_baselines.trpo_mpi import TRPO
 del mpi4py
 
-__version__ = "2.10.0"
+__version__ = "2.10.1a0"
diff --git a/stable_baselines/ddpg/ddpg.py b/stable_baselines/ddpg/ddpg.py
@@ -1000,6 +1000,11 @@ def learn(self, total_timesteps, callback=None, log_interval=100, tb_log_name="D
                                     eval_episode_reward = 0.
 
                     mpi_size = MPI.COMM_WORLD.Get_size()
+
+                    # Not enough samples in the replay buffer
+                    if not self.replay_buffer.can_sample(self.batch_size):
+                        continue
+
                     # Log stats.
                     # XXX shouldn't call np.mean on variable length lists
                     duration = time.time() - start_time