From 4ed79a7aa2a2d95cc442d1b900b5efcfa6bb9f0e Mon Sep 17 00:00:00 2001
From: Luffy <rajrohan1108@gmail.com>
Date: Wed, 4 Jul 2018 16:32:32 +0900
Subject: [PATCH] Detach z star p.

---
 train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train.py b/train.py
index ab4df3d..5c885f3 100644
--- a/train.py
+++ b/train.py
@@ -94,7 +94,7 @@ def _trust_region_loss(model, distribution, ref_distribution, loss, threshold):
   z_star = [g_p - trust_factor.expand_as(k_p) * k_p for g_p, k_p in zip(g, k)]
   trust_loss = 0
   for param, z_star_p in zip([model.fc_actor.weight, model.fc_actor.bias], z_star):
-    trust_loss += (param * z_star_p).sum()
+    trust_loss += (param * z_star_p.detach()).sum()
   _isolate_policy_grads(model, False)  # Re-enable gradients for other parameters
   return trust_loss