Skip to content

Commit

Permalink
Fix discount factor and node update on first run (#34)
Browse files Browse the repository at this point in the history
* fixed discount factor and node update

* new_node prevents repeat of simulate line
  • Loading branch information
zsunberg committed Feb 16, 2018
1 parent 464b8c9 commit d77f22e
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions src/dpw.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ function simulate(dpw::DPWPlanner, snode::Int, d::Int)
a = tree.a_labels[sanode]

# state progressive widening
new_node = false
if length(tree.transitions[sanode]) <= sol.k_state*tree.n[sanode]^sol.alpha_state

sp, r = generate_sr(dpw.mdp, s, a, dpw.rng)
Expand All @@ -132,17 +133,19 @@ function simulate(dpw::DPWPlanner, snode::Int, d::Int)

if spnode == 0 # there was not a state node for sp already in the tree
spnode = insert_state_node!(tree, sp, sol.keep_tree || sol.check_repeat_state)
new_node = true
end
push!(tree.transitions[sanode], (spnode, r))

if tree.total_n[spnode] == 0
return r + estimate_value(dpw.solved_estimate, dpw.mdp, sp, d-1)
end
else
(spnode, r) = rand(dpw.rng, tree.transitions[sanode])
spnode, r = rand(dpw.rng, tree.transitions[sanode])
end

q = r + discount(dpw.mdp)*simulate(dpw, spnode, d-1)
if new_node
q = r + discount(dpw.mdp)*estimate_value(dpw.solved_estimate, dpw.mdp, sp, d-1)
else
q = r + discount(dpw.mdp)*simulate(dpw, spnode, d-1)
end

tree.n[sanode] += 1
tree.total_n[snode] += 1
Expand Down

0 comments on commit d77f22e

Please sign in to comment.