Release 2.5.1 (#304)

* Fix typo in kl-div * Update tb legacy instructions * Bump version * Capitalize Leibler * Typo in GAIL model
Stable-Baselines-Team · May 4, 2019 · bddd1ab · bddd1ab
1 parent f238a4c
commit bddd1ab
Show file tree

Hide file tree

Showing 13 changed files with 26 additions and 16 deletions.
diff --git a/docs/guide/tensorboard.rst b/docs/guide/tensorboard.rst
@@ -89,6 +89,14 @@ For that, you need to define several environment variables:
   export OPENAI_LOG_FORMAT='stdout,log,csv,tensorboard'
   export OPENAI_LOGDIR=path/to/tensorboard/data
 
+and to configure the logger using:
+
+.. code-block:: python
+
+  from stable_baselines.logger import configure
+
+  configure()
+
 
 Then start tensorboard with:
 

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -5,9 +5,11 @@ Changelog
 
 For download links, please look at `Github release page <https://github.com/hill-a/stable-baselines/releases>`_.
 
-Pre-Release 2.5.1a0 (WIP)
+Release 2.5.1 (2019-05-04)
 --------------------------
 
+**Bug fixes + improvements in the VecEnv**
+
 - doc update (fix example of result plotter + improve doc)
 - fixed logger issues when stdout lacks ``read`` function
 - fixed a bug in ``common.dataset.Dataset`` where shuffling was not disabled properly (it affects only PPO1 with recurrent policies)
@@ -20,8 +22,8 @@ Pre-Release 2.5.1a0 (WIP)
   ``set_attr`` now returns ``None`` rather than a list of ``None``. (@kantneel)
 - ``GAIL``: ``gail.dataset.ExpertDataset` supports loading from memory rather than file, and
   ``gail.dataset.record_expert`` supports returning in-memory rather than saving to file.
-- added support in ``VecEnvWrapper`` for accessing attributes of arbitrarily deeply nested 
-  instances of ``VecEnvWrapper`` and ``VecEnv``. This is allowed as long as the attribute belongs 
+- added support in ``VecEnvWrapper`` for accessing attributes of arbitrarily deeply nested
+  instances of ``VecEnvWrapper`` and ``VecEnv``. This is allowed as long as the attribute belongs
   to exactly one of the nested instances i.e. it must be unambiguous. (@kantneel)
 - fixed bug where result plotter would crash on very short runs (@Pastafarianist)
 - added option to not trim output of result plotter by number of timesteps (@Pastafarianist)

diff --git a/setup.py b/setup.py
@@ -143,7 +143,7 @@
       license="MIT",
       long_description=long_description,
       long_description_content_type='text/markdown',
-      version="2.5.1a0",
+      version="2.5.1",
       )
 
 # python setup.py sdist

diff --git a/stable_baselines/__init__.py b/stable_baselines/__init__.py
@@ -9,4 +9,4 @@
 from stable_baselines.trpo_mpi import TRPO
 from stable_baselines.sac import SAC
 
-__version__ = "2.5.1a0"
+__version__ = "2.5.1"
diff --git a/stable_baselines/acktr/acktr_cont.py b/stable_baselines/acktr/acktr_cont.py
@@ -72,7 +72,7 @@ def learn(env, policy, value_fn, gamma, lam, timesteps_per_batch, num_timesteps,
     :param num_timesteps: (int) the total number of timesteps to run
     :param animate: (bool) if render env
     :param callback: (function) called every step, used for logging and saving
-    :param desired_kl: (float) the Kullback leibler weight for the loss
+    :param desired_kl: (float) the Kullback-Leibler weight for the loss
     """
     obfilter = ZFilter(env.observation_space.shape)
 

diff --git a/stable_baselines/acktr/acktr_disc.py b/stable_baselines/acktr/acktr_disc.py
@@ -33,7 +33,7 @@ class ACKTR(ActorCriticRLModel):
     :param vf_fisher_coef: (float) The weight for the fisher loss on the value function
     :param learning_rate: (float) The initial learning rate for the RMS prop optimizer
     :param max_grad_norm: (float) The clipping value for the maximum gradient
-    :param kfac_clip: (float) gradient clipping for Kullback leiber
+    :param kfac_clip: (float) gradient clipping for Kullback-Leibler
     :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant',
                         'double_linear_con', 'middle_drop' or 'double_middle_drop')
     :param verbose: (int) the verbosity level: 0 none, 1 training information, 2 tensorflow debug

diff --git a/stable_baselines/acktr/kfac.py b/stable_baselines/acktr/kfac.py
@@ -22,7 +22,7 @@ def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2
 
         :param learning_rate: (float) The learning rate
         :param momentum: (float) The momentum value for the TensorFlow momentum optimizer
-        :param clip_kl: (float) gradient clipping for Kullback leiber
+        :param clip_kl: (float) gradient clipping for Kullback-Leibler
         :param kfac_update: (int) update kfac after kfac_update steps
         :param stats_accum_iter: (int) how may steps to accumulate stats
         :param full_stats_init: (bool) whether or not to fully initalize stats

diff --git a/stable_baselines/acktr/utils.py b/stable_baselines/acktr/utils.py
@@ -33,12 +33,12 @@ def dense(input_tensor, size, name, weight_init=None, bias_init=0, weight_loss_d
 
 def kl_div(action_dist1, action_dist2, action_size):
     """
-    Kullback leiber divergence
+    Kullback-Leibler divergence
     
     :param action_dist1: ([TensorFlow Tensor]) action distribution 1
     :param action_dist2: ([TensorFlow Tensor]) action distribution 2
     :param action_size: (int) the shape of an action
-    :return: (float) Kullback leiber divergence
+    :return: (float) Kullback-Leibler divergence
     """
     mean1, std1 = action_dist1[:, :action_size], action_dist1[:, action_size:]
     mean2, std2 = action_dist2[:, :action_size], action_dist2[:, action_size:]

diff --git a/stable_baselines/common/distributions.py b/stable_baselines/common/distributions.py
@@ -39,7 +39,7 @@ def neglogp(self, x):
 
     def kl(self, other):
         """
-        Calculates the Kullback-Leiber divergence from the given probabilty distribution
+        Calculates the Kullback-Leibler divergence from the given probabilty distribution
 
         :param other: ([float]) the distibution to compare with
         :return: (float) the KL divergence of the two distributions

diff --git a/stable_baselines/gail/model.py b/stable_baselines/gail/model.py
@@ -15,7 +15,7 @@ class GAIL(TRPO):
     :param expert_dataset: (ExpertDataset) the dataset manager
     :param gamma: (float) the discount value
     :param timesteps_per_batch: (int) the number of timesteps to run per batch (horizon)
-    :param max_kl: (float) the kullback leiber loss threashold
+    :param max_kl: (float) the Kullback-Leibler loss threshold
     :param cg_iters: (int) the number of iterations for the conjugate gradient calculation
     :param lam: (float) GAE factor
     :param entcoeff: (float) the weight for the entropy loss

diff --git a/stable_baselines/ppo1/pposgd_simple.py b/stable_baselines/ppo1/pposgd_simple.py
@@ -145,7 +145,7 @@ def setup_model(self):
                     tf.summary.scalar('entropy_loss', pol_entpen)
                     tf.summary.scalar('policy_gradient_loss', pol_surr)
                     tf.summary.scalar('value_function_loss', vf_loss)
-                    tf.summary.scalar('approximate_kullback-leiber', meankl)
+                    tf.summary.scalar('approximate_kullback-leibler', meankl)
                     tf.summary.scalar('clip_factor', clip_param)
                     tf.summary.scalar('loss', total_loss)
 

diff --git a/stable_baselines/ppo2/ppo2.py b/stable_baselines/ppo2/ppo2.py
@@ -161,7 +161,7 @@ def setup_model(self):
                     tf.summary.scalar('entropy_loss', self.entropy)
                     tf.summary.scalar('policy_gradient_loss', self.pg_loss)
                     tf.summary.scalar('value_function_loss', self.vf_loss)
-                    tf.summary.scalar('approximate_kullback-leiber', self.approxkl)
+                    tf.summary.scalar('approximate_kullback-leibler', self.approxkl)
                     tf.summary.scalar('clip_factor', self.clipfrac)
                     tf.summary.scalar('loss', loss)
 

diff --git a/stable_baselines/trpo_mpi/trpo_mpi.py b/stable_baselines/trpo_mpi/trpo_mpi.py
@@ -26,7 +26,7 @@ class TRPO(ActorCriticRLModel):
     :param env: (Gym environment or str) The environment to learn from (if registered in Gym, can be str)
     :param gamma: (float) the discount value
     :param timesteps_per_batch: (int) the number of timesteps to run per batch (horizon)
-    :param max_kl: (float) the kullback leiber loss threshold
+    :param max_kl: (float) the Kullback-Leibler loss threshold
     :param cg_iters: (int) the number of iterations for the conjugate gradient calculation
     :param lam: (float) GAE factor
     :param entcoeff: (float) the weight for the entropy loss
@@ -183,7 +183,7 @@ def setup_model(self):
                     tf.summary.scalar('entropy_loss', meanent)
                     tf.summary.scalar('policy_gradient_loss', optimgain)
                     tf.summary.scalar('value_function_loss', surrgain)
-                    tf.summary.scalar('approximate_kullback-leiber', meankl)
+                    tf.summary.scalar('approximate_kullback-leibler', meankl)
                     tf.summary.scalar('loss', optimgain + meankl + entbonus + surrgain + meanent)
 
                     self.assign_old_eq_new = \