Skip to content

Commit

Permalink
Sample noise at update rate before training
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaixhin committed May 29, 2018
1 parent 952fcb4 commit a8d01b8
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
3 changes: 1 addition & 2 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ def learn(self, mem):
# Sample transitions
idxs, states, actions, returns, next_states, nonterminals, weights = mem.sample(self.batch_size)

# Calculate current state probabilities
self.online_net.reset_noise() # Sample new noise for online network
# Calculate current state probabilities (online network noise already sampled)
ps = self.online_net(states) # Probabilities p(s_t, ·; θonline)
ps_a = ps[range(self.batch_size), actions] # p(s_t, a_t; θonline)

Expand Down
4 changes: 3 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ def log(s):
while T < args.T_max:
if done:
state, done = env.reset(), False
dqn.reset_noise() # Draw a new set of noisy weights for each episode (better for before learning starts)

if T % args.replay_frequency == 0:
dqn.reset_noise() # Draw a new set of noisy weights

action = dqn.act(state) # Choose an action greedily (with noisy weights)
next_state, reward, done = env.step(action) # Step
Expand Down

0 comments on commit a8d01b8

Please sign in to comment.