From 4ed79a7aa2a2d95cc442d1b900b5efcfa6bb9f0e Mon Sep 17 00:00:00 2001 From: Luffy Date: Wed, 4 Jul 2018 16:32:32 +0900 Subject: [PATCH] Detach z star p. --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index ab4df3d..5c885f3 100644 --- a/train.py +++ b/train.py @@ -94,7 +94,7 @@ def _trust_region_loss(model, distribution, ref_distribution, loss, threshold): z_star = [g_p - trust_factor.expand_as(k_p) * k_p for g_p, k_p in zip(g, k)] trust_loss = 0 for param, z_star_p in zip([model.fc_actor.weight, model.fc_actor.bias], z_star): - trust_loss += (param * z_star_p).sum() + trust_loss += (param * z_star_p.detach()).sum() _isolate_policy_grads(model, False) # Re-enable gradients for other parameters return trust_loss