Skip to content

Commit

Permalink
appended documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
AboudyKreidieh committed Sep 28, 2019
1 parent d764588 commit e1fdfcd
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
6 changes: 6 additions & 0 deletions hbaselines/hiro/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ class TD3(object):
actor_update_freq : int
number of training steps per actor policy update step. The critic
policy is updated every training step.
meta_update_freq : int
number of training steps per meta policy update step. The actor policy
of the meta-policy is further updated at the frequency provided by the
actor_update_freq variable. Note that this value is only relevant when
using the GoalDirectedPolicy policy.
reward_scale : float
the value the reward should be scaled by
render : bool
Expand Down Expand Up @@ -917,6 +922,7 @@ def _evaluate(self, total_timesteps):
eval_action, _ = self._policy(
eval_obs,
apply_noise=False,
random_actions=False,
compute_q=False,
context=[getattr(self.eval_env, "current_context", None)],
episode_step=eval_episode_step)
Expand Down
40 changes: 38 additions & 2 deletions hbaselines/hiro/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,24 @@ def make_critic(self, obs, action, reuse=False, scope="qf"):
return qvalue_fn

def update(self, update_actor=True, **kwargs):
"""See parent class."""
"""Perform a gradient update step.
**Note**; The target update soft updates occur at the same frequency as
the actor update frequencies.
Parameters
----------
update_actor : bool
specifies whether to update the actor policy. The critic policy is
still updated if this value is set to False.
Returns
-------
float
critic loss
float
actor loss
"""
# Not enough samples in the replay buffer.
if not self.replay_buffer.can_sample(self.batch_size):
return 0, 0
Expand Down Expand Up @@ -1263,7 +1280,9 @@ def initialize(self):
self.meta_reward = 0

def update(self, update_actor=True, **kwargs):
"""See parent class.
"""Perform a gradient update step.
This is done both at the level of the Manager and Worker policies.
The kwargs argument for this method contains two additional terms:
Expand All @@ -1272,6 +1291,23 @@ def update(self, update_actor=True, **kwargs):
* update_meta_actor (bool): similar to the `update_policy` term, but
for the meta-policy. Note that, if `update_meta` is set to False,
this term is void.
**Note**; The target update soft updates for both the manager and the
worker policies occur at the same frequency as their respective actor
update frequencies.
Parameters
----------
update_actor : bool
specifies whether to update the actor policy. The critic policy is
still updated if this value is set to False.
Returns
-------
(float, float)
manager critic loss, worker critic loss
(float, float)
manager actor loss, worker actor loss
"""
# Not enough samples in the replay buffer.
if not self.replay_buffer.can_sample(self.batch_size):
Expand Down

0 comments on commit e1fdfcd

Please sign in to comment.