In [1]:
addprocs(6);

In [2]:
@everywhere begin
import POMDPModels
using POMCP
import POMDPs
import POMDPToolbox.PreviousObservation

import POMCP.belief_from_node
import POMCP.init_V
import POMDPs.action
end

In [3]:
N = 2000;
eps = 0.01;
problem = POMDPModels.BabyPOMDP(-5, -10);

In [4]:
@everywhere type RandomBabyPolicy <: POMDPs.Policy
    rng::AbstractRNG
end
@everywhere action(p::RandomBabyPolicy, b::POMDPs.Belief) = POMDPModels.BabyAction(rand(p.rng)>0.5)

In [10]:
@everywhere function belief_from_node(problem::POMDPModels.BabyPOMDP, node::POMCP.ObsNode)
    return PreviousObservation(node.label)
end

In [5]:
function est_reward(problem, policy, belief, N)
    sum = @parallel (+) for i in 1:N
        sim_rng = MersenneTwister(i)
        POMDPs.simulate(problem, policy, belief, rng=sim_rng, eps=eps, initial_state=POMDPModels.BabyState(false))
    end
    return sum/N;
end

est_reward (generic function with 1 method)

In [6]:
# Feed when Crying (Expected Reward for this nearly-optimal policy is -17.14)
# Test from earlier this week with 5000 experiments: -16.72
est_reward(problem, POMDPModels.FeedWhenCrying(), PreviousObservation(POMDPModels.BabyObservation(false)), N)

-16.600315445486913

In [7]:
# Random
pol_rng = MersenneTwister(7)
est_reward(problem, RandomBabyPolicy(pol_rng), PreviousObservation(POMDPModels.BabyObservation(false)), N)

-32.34750118876957

In [11]:
# POMCP with FWC rollout policy
# test from earlier this week with 5000 experiments: -16.77
rng = MersenneTwister(2)

solver = POMCPSolver(POMDPModels.FeedWhenCrying(),
                     0.01,
                     10,
                     500, 
                     rng,
                     false)

policy = solve(solver, problem)

@time est_reward(problem, policy, POMCPBeliefWrapper(POMDPModels.BabyStateDistribution(0.0)), N)

exception on 2: ERROR: FeedWhenCrying does not implement action
 in error at error.jl:21
 in action at /home/zach/.julia/POMDPs/src/policy.jl:4
 in simulate at /home/zach/.julia/POMDPs/src/simulate.jl:33
 in simulate at /home/zach/.julia/POMCP/src/solver.jl:70
 in search at /home/zach/.julia/POMCP/src/solver.jl:39
 in action at /home/zach/.julia/POMCP/src/solver.jl:13
 in simulate at /home/zach/.julia/POMDPs/src/simulate.jl:33
 in anonymous at no file:4
 in anonymous at multi.jl:1279
 in anonymous at multi.jl:848
 in run_work_thunk at multi.jl:621
 in run_work_thunk at multi.jl:630
 in anonymous at task.jl:6
exception on 3: ERROR: FeedWhenCrying does not implement action
 in error at error.jl:21
 in action at /home/zach/.julia/POMDPs/src/policy.jl:4
 in simulate at /home/zach/.julia/POMDPs/src/simulate.jl:33
 in simulate at /home/zach/.julia/POMCP/src/solver.jl:70
 in search at /home/zach/.julia/POMCP/src/solver.jl:39
 in action at /home/zach/.julia/POMCP/src/solver.jl:13
 in simulate 

LoadError: `+` has no method matching +(::ErrorException, ::ErrorException)
while loading In[11], in expression starting on line 56

exception on 6: exception on 7: ERROR: FeedWhenCrying does not implement action
 in error at error.jl:21
 in action at /home/zach/.julia/POMDPs/src/policy.jl:4
 in simulate at /home/zach/.julia/POMDPs/src/simulate.jl:33
 in simulate at /home/zach/.julia/POMCP/src/solver.jl:70
 in search at /home/zach/.julia/POMCP/src/solver.jl:39
 in action at /home/zach/.julia/POMCP/src/solver.jl:13
 in simulate at /home/zach/.julia/POMDPs/src/simulate.jl:33
 in anonymous at no file:4
 in anonymous at multi.jl:1279
 in anonymous at multi.jl:848
 in run_work_thunk at multi.jl:621
 in run_work_thunk at multi.jl:630
 in anonymous at task.jl:6
ERROR: FeedWhenCrying does not implement action
 in error at error.jl:21
 in action at /home/zach/.julia/POMDPs/src/policy.jl:4
 in simulate at /home/zach/.julia/POMDPs/src/simulate.jl:33
 in simulate at /home/zach/.julia/POMCP/src/solver.jl:70
 in search at /home/zach/.julia/POMCP/src/solver.jl:39
 in action at /home/zach/.julia/POMCP/src/solver.jl:13
 in simulate 

In [9]:
# POMCP with Random rollout policy
rng = MersenneTwister(2)
rollout_pol_rng = MersenneTwister(2)

solver = POMCPSolver(RandomBabyPolicy(rollout_pol_rng),
                     0.01,
                     10,
                     300, 
                     rng,
                     false)

policy = solve(solver, problem)

@time est_reward(problem, policy, POMCPBeliefWrapper(POMDPModels.BabyStateDistribution(0.0)), N)

elapsed time: 348.985811789 seconds (332384 bytes allocated)


-17.106689861988823