Skip to content

Commit

Permalink
Add momentum parameter to A2C (#748)
Browse files Browse the repository at this point in the history
* Add momentum parameter to A2C

* Change default momentum to zero

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
  • Loading branch information
kantneel and araffin committed Apr 9, 2020
1 parent 31d9c83 commit 815628e
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Breaking Changes:

New Features:
^^^^^^^^^^^^^
- Added momentum parameter to A2C for the embedded RMSPropOptimizer (@kantneel)

Bug Fixes:
^^^^^^^^^^
Expand Down
8 changes: 5 additions & 3 deletions stable_baselines/a2c/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class A2C(ActorCriticRLModel):
:param max_grad_norm: (float) The maximum value for the gradient clipping
:param learning_rate: (float) The learning rate
:param alpha: (float) RMSProp decay parameter (default: 0.99)
:param momentum: (float) RMSProp momentum parameter (default: 0.0)
:param epsilon: (float) RMSProp epsilon (stabilizes square root computation in denominator of RMSProp update)
(default: 1e-5)
:param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant',
Expand All @@ -63,8 +64,8 @@ class A2C(ActorCriticRLModel):
"""

def __init__(self, policy, env, gamma=0.99, n_steps=5, vf_coef=0.25, ent_coef=0.01, max_grad_norm=0.5,
learning_rate=7e-4, alpha=0.99, epsilon=1e-5, lr_schedule='constant', verbose=0,
tensorboard_log=None, _init_setup_model=True, policy_kwargs=None,
learning_rate=7e-4, alpha=0.99, momentum=0.0, epsilon=1e-5, lr_schedule='constant',
verbose=0, tensorboard_log=None, _init_setup_model=True, policy_kwargs=None,
full_tensorboard_log=False, seed=None, n_cpu_tf_sess=None):

self.n_steps = n_steps
Expand All @@ -73,6 +74,7 @@ def __init__(self, policy, env, gamma=0.99, n_steps=5, vf_coef=0.25, ent_coef=0.
self.ent_coef = ent_coef
self.max_grad_norm = max_grad_norm
self.alpha = alpha
self.momentum = momentum
self.epsilon = epsilon
self.lr_schedule = lr_schedule
self.learning_rate = learning_rate
Expand Down Expand Up @@ -180,7 +182,7 @@ def setup_model(self):
tf.summary.histogram('observation', train_model.obs_ph)

trainer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate_ph, decay=self.alpha,
epsilon=self.epsilon)
epsilon=self.epsilon, momentum=self.momentum)
self.apply_backprop = trainer.apply_gradients(grads)

self.train_model = train_model
Expand Down

0 comments on commit 815628e

Please sign in to comment.