Sample noise at update rate before training

Kaixhin · May 29, 2018 · a8d01b8 · a8d01b8
1 parent 952fcb4
commit a8d01b8
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/agent.py b/agent.py
@@ -49,8 +49,7 @@ def learn(self, mem):
     # Sample transitions
     idxs, states, actions, returns, next_states, nonterminals, weights = mem.sample(self.batch_size)
 
-    # Calculate current state probabilities
-    self.online_net.reset_noise()  # Sample new noise for online network
+    # Calculate current state probabilities (online network noise already sampled)
     ps = self.online_net(states)  # Probabilities p(s_t, ·; θonline)
     ps_a = ps[range(self.batch_size), actions]  # p(s_t, a_t; θonline)
 

diff --git a/main.py b/main.py
@@ -97,7 +97,9 @@ def log(s):
   while T < args.T_max:
     if done:
       state, done = env.reset(), False
-      dqn.reset_noise()  # Draw a new set of noisy weights for each episode (better for before learning starts)
+
+    if T % args.replay_frequency == 0:
+      dqn.reset_noise()  # Draw a new set of noisy weights
 
     action = dqn.act(state)  # Choose an action greedily (with noisy weights)
     next_state, reward, done = env.step(action)  # Step