Update log_interval ppo2 (#73)

Previous implementation does not comply with the objective. Additionally it should be more consistent with other algorithms (A2C) is still a bit different.
Stable-Baselines-Team · Nov 4, 2018 · 6776f53 · 6776f53
1 parent 7c95b74
commit 6776f53
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/stable_baselines/ppo2/ppo2.py b/stable_baselines/ppo2/ppo2.py
@@ -254,7 +254,7 @@ def _train_step(self, learning_rate, cliprange, obs, returns, masks, actions, va
 
         return policy_loss, value_loss, policy_entropy, approxkl, clipfrac
 
-    def learn(self, total_timesteps, callback=None, seed=None, log_interval=100, tb_log_name="PPO2"):
+    def learn(self, total_timesteps, callback=None, seed=None, log_interval=1, tb_log_name="PPO2"):
         with SetVerbosity(self.verbose), TensorboardWriter(self.graph, self.tensorboard_log, tb_log_name) as writer:
             self._setup_learn(seed)
 
@@ -319,7 +319,7 @@ def learn(self, total_timesteps, callback=None, seed=None, log_interval=100, tb_
                 if callback is not None:
                     callback(locals(), globals())
 
-                if self.verbose >= 1 and ((update + 1) % log_interval//100 == 0 or update == 0):
+                if self.verbose >= 1 and ((update + 1) % log_interval == 0 or update == 0):
                     explained_var = explained_variance(values, returns)
                     logger.logkv("serial_timesteps", (update + 1) * self.n_steps)
                     logger.logkv("nupdates", (update + 1))