minor fixes (#372)

SforAiDl · Oct 5, 2020 · 52b0b4c · 52b0b4c
1 parent 608fc03
commit 52b0b4c
Show file tree

Hide file tree

Showing 8 changed files with 8 additions and 3 deletions.
diff --git a/docs/source/usage/tutorials/Deep/DDPG.rst b/docs/source/usage/tutorials/Deep/DDPG.rst
@@ -65,7 +65,7 @@ DDPG makes use of target networks for the actor(policy) and the critic(value) ne
 
 .. math::
 
-    y_t = r(s_t, a_t) + \gamma Q_targ(s_{t+1}, \mu_targ(s_{t+1}) \vert \theta^{Q})
+    y_t = r(s_t, a_t) + \gamma Q_{targ}(s_{t+1}, \mu_{targ}(s_{t+1}) \vert \theta^{Q})
 
 Buliding up on Deterministic Policy Gradients, the gradient of the policy can be determined using the action-value function as
 

diff --git a/docs/source/usage/tutorials/Deep/NoisyNet_DQN.rst b/docs/source/usage/tutorials/Deep/NoisyNet_DQN.rst
@@ -29,7 +29,7 @@ A noisy parameter :math:`\theta` is defined as:
 
 .. math::
 
-    \theta \coloneqq \mu + \Sigma \odot \epsilon
+    \theta := \mu + \Sigma \odot \epsilon
 
 where :math:`\Sigma` and :math:`\mu` are vectors of trainable parameters and :math:`\epsilon` is a vector of zero mean noise. Hence, the loss function is now defined with respect to :math:`\Sigma` and :math:`\mu`
 and the optimization now takes place with respect to :math:`\Sigma` and :math:`\mu`. :math:`\epsilon` is sampled from factorised gaussian noise.

diff --git a/docs/source/usage/tutorials/Using shared parameters in actor critic agents.rst b/docs/source/usage/tutorials/Using shared parameters in actor critic agents.rst
@@ -29,7 +29,7 @@ critic network act on this feature vector to select an action and estimate the v
      action               value
 
 GenRL provides support to incorporte this decoder network in all of the actor critic agents through a ``shared_layers``
-parameter. ``shared_layers`` takes the sizes of the mlp layers o be used, and ``None`` if no decoder network is to be
+parameter. ``shared_layers`` takes the sizes of the mlp layers to be used, and ``None`` if no decoder network is to be
 used
 
 As an example - in A2C -

diff --git a/genrl/agents/deep/a2c/a2c.py b/genrl/agents/deep/a2c/a2c.py
@@ -29,6 +29,7 @@ class A2C(OnPolicyAgent):
         gamma (float): The discount factor for rewards
         layers (:obj:`tuple` of :obj:`int`): Layers in the Neural Network
             of the Q-value function
+        shared_layers(:obj:`tuple` of :obj:`int`): Sizes of shared layers in Actor Critic if using
         lr_policy (float): Learning rate for the policy/actor
         lr_value (float): Learning rate for the critic
         rollout_size (int): Capacity of the Replay Buffer

diff --git a/genrl/agents/deep/ddpg/ddpg.py b/genrl/agents/deep/ddpg/ddpg.py
@@ -23,6 +23,7 @@ class DDPG(OffPolicyAgentAC):
         gamma (float): The discount factor for rewards
         layers (:obj:`tuple` of :obj:`int`): Layers in the Neural Network
             of the Q-value function
+        shared_layers(:obj:`tuple` of :obj:`int`): Sizes of shared layers in Actor Critic if using
         lr_policy (float): Learning rate for the policy/actor
         lr_value (float): Learning rate for the critic
         replay_size (int): Capacity of the Replay Buffer

diff --git a/genrl/agents/deep/ppo1/ppo1.py b/genrl/agents/deep/ppo1/ppo1.py
@@ -29,6 +29,7 @@ class PPO1(OnPolicyAgent):
         gamma (float): The discount factor for rewards
         layers (:obj:`tuple` of :obj:`int`): Layers in the Neural Network
             of the Q-value function
+        shared_layers(:obj:`tuple` of :obj:`int`): Sizes of shared layers in Actor Critic if using
         lr_policy (float): Learning rate for the policy/actor
         lr_value (float): Learning rate for the Q-value function
         rollout_size (int): Capacity of the Rollout Buffer

diff --git a/genrl/agents/deep/sac/sac.py b/genrl/agents/deep/sac/sac.py
@@ -22,6 +22,7 @@ class SAC(OffPolicyAgentAC):
         gamma (float): The discount factor for rewards
         policy_layers (:obj:`tuple` of :obj:`int`): Neural network layer dimensions for the policy
         value_layers (:obj:`tuple` of :obj:`int`): Neural network layer dimensions for the critics
+        shared_layers(:obj:`tuple` of :obj:`int`): Sizes of shared layers in Actor Critic if using
         lr_policy (float): Learning rate for the policy/actor
         lr_value (float): Learning rate for the critic
         replay_size (int): Capacity of the Replay Buffer

diff --git a/genrl/agents/deep/td3/td3.py b/genrl/agents/deep/td3/td3.py
@@ -22,6 +22,7 @@ class TD3(OffPolicyAgentAC):
         gamma (float): The discount factor for rewards
         policy_layers (:obj:`tuple` of :obj:`int`): Neural network layer dimensions for the policy
         value_layers (:obj:`tuple` of :obj:`int`): Neural network layer dimensions for the critics
+        shared_layers(:obj:`tuple` of :obj:`int`): Sizes of shared layers in Actor Critic if using
         lr_policy (float): Learning rate for the policy/actor
         lr_value (float): Learning rate for the critic
         replay_size (int): Capacity of the Replay Buffer