Skip to content

Commit

Permalink
Merge pull request #41 from hill-a/python37-patch
Browse files Browse the repository at this point in the history
Fixes for python 3.7
  • Loading branch information
hill-a committed Oct 1, 2018
2 parents 048eea4 + ae7d1b0 commit 288f458
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 14 deletions.
1 change: 1 addition & 0 deletions docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Pre Release 2.0.1.a0 (WIP)
**logging and bug fixes**

- added patch fix for equal function using `gym.spaces.MultiDiscrete` and `gym.spaces.MultiBinary`
- replaced `async` with `async_eigen_decomp` in ACKTR/KFAC for python 3.7 compat


Release 2.0.0 (2018-09-18)
Expand Down
2 changes: 1 addition & 1 deletion stable_baselines/acktr/acktr_cont.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def learn(env, policy, value_fn, gamma, lam, timesteps_per_batch, num_timesteps,
stepsize = tf.Variable(initial_value=np.float32(np.array(0.03)), name='stepsize')
inputs, loss, loss_sampled = policy.update_info
optim = kfac.KfacOptimizer(learning_rate=stepsize, cold_lr=stepsize * (1 - 0.9), momentum=0.9, kfac_update=2,
epsilon=1e-2, stats_decay=0.99, async=1, cold_iter=1,
epsilon=1e-2, stats_decay=0.99, async_eigen_decomp=1, cold_iter=1,
weight_decay_dict=policy.wd_dict, max_grad_norm=None)
pi_var_list = []
for var in tf.trainable_variables():
Expand Down
5 changes: 3 additions & 2 deletions stable_baselines/acktr/acktr_disc.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,9 @@ def setup_model(self):
with tf.variable_scope("kfac", reuse=False, custom_getter=tf_util.outer_scope_getter("kfac")):
with tf.device('/gpu:0'):
self.optim = optim = kfac.KfacOptimizer(learning_rate=pg_lr_ph, clip_kl=self.kfac_clip,
momentum=0.9, kfac_update=1, epsilon=0.01,
stats_decay=0.99, async=1, cold_iter=10,
momentum=0.9, kfac_update=1,
epsilon=0.01, stats_decay=0.99,
async_eigen_decomp=True, cold_iter=10,
max_grad_norm=self.max_grad_norm, verbose=self.verbose)

optim.compute_and_apply_stats(self.joint_fisher, var_list=params)
Expand Down
15 changes: 8 additions & 7 deletions stable_baselines/acktr/kfac.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

class KfacOptimizer:
def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2, stats_accum_iter=60,
full_stats_init=False, cold_iter=100, cold_lr=None, async=False, async_stats=False, epsilon=1e-2,
stats_decay=0.95, blockdiag_bias=False, channel_fac=False, factored_damping=False, approx_t2=False,
full_stats_init=False, cold_iter=100, cold_lr=None, async_eigen_decomp=False,
async_stats=False, epsilon=1e-2, stats_decay=0.95, blockdiag_bias=False,
channel_fac=False, factored_damping=False, approx_t2=False,
use_float64=False, weight_decay_dict=None, max_grad_norm=0.5, verbose=1):
"""
Kfac Optimizer for ACKTR models
Expand All @@ -27,7 +28,7 @@ def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2
:param full_stats_init: (bool) whether or not to fully initalize stats
:param cold_iter: (int) Cold start learning rate for how many steps
:param cold_lr: (float) Cold start learning rate
:param async: (bool) Use async eigen decomposition
:param async_eigen_decomp: (bool) Use async eigen decomposition
:param async_stats: (bool) Asynchronous stats update
:param epsilon: (float) epsilon value for small numbers
:param stats_decay: (float) the stats decay rate
Expand All @@ -46,7 +47,7 @@ def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2
self._clip_kl = clip_kl
self._channel_fac = channel_fac
self._kfac_update = kfac_update
self._async = async
self._async_eigen_decomp = async_eigen_decomp
self._async_stats = async_stats
self._epsilon = epsilon
self._stats_decay = stats_decay
Expand Down Expand Up @@ -866,7 +867,7 @@ def apply_gradients_kfac(self, grads):

queue_runner = None
# launch eigen-decomp on a queue thread
if self._async:
if self._async_eigen_decomp:
if self.verbose >= 1:
print('Use async eigen decomp')
# get a list of factor loading tensors
Expand Down Expand Up @@ -895,14 +896,14 @@ def dequeue_op():
assert self._update_stats_op is not None
update_ops.append(self._update_stats_op)
dependency_list = []
if not self._async:
if not self._async_eigen_decomp:
dependency_list.append(self._update_stats_op)

with tf.control_dependencies(dependency_list):
def no_op_wrapper():
return tf.group(*[tf.assign_add(self.cold_step, 1)])

if not self._async:
if not self._async_eigen_decomp:
# synchronous eigen-decomp updates
update_factor_ops = tf.cond(tf.logical_and(tf.equal(tf.mod(self.stats_step, self._kfac_update),
tf.convert_to_tensor(0)),
Expand Down
3 changes: 0 additions & 3 deletions stable_baselines/acktr/run_atari.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#!/usr/bin/env python3
from functools import partial

from stable_baselines import logger
from stable_baselines.acktr import ACKTR
from stable_baselines.common.cmd_util import make_atari_env, atari_arg_parser
Expand Down
2 changes: 1 addition & 1 deletion stable_baselines/acktr/value_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, ob_dim, ac_dim, verbose=1):

optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001 * (1 - 0.9), momentum=0.9,
clip_kl=0.3, epsilon=0.1, stats_decay=0.95,
async=1, kfac_update=2, cold_iter=50,
async_eigen_decomp=True, kfac_update=2, cold_iter=50,
weight_decay_dict=wd_dict, max_grad_norm=None, verbose=verbose)
vf_var_list = []
for var in tf.trainable_variables():
Expand Down

0 comments on commit 288f458

Please sign in to comment.