In [9]:
using POMDPs
using QuickPOMDPs
using POMDPModelTools: Uniform, Deterministic
using Distributions: Normal
using Random
using POMDPSimulators: HistoryRecorder
using POMDPPolicies: RandomPolicy

In [6]:
r = 60
light_loc = 10

m = QuickPOMDP(
    states = -r:r+1,                  # r+1 is a terminal state
    actions = [-10, -1, 0, 1, 10],
    discount = 0.95,
    isterminal = s -> !(s in -r:r),
    obstype = Float64,

    transition = function (s, a)
        if a == 0
            return Deterministic(r+1)
        else
            return Deterministic(clamp(s+a, -r, r))
        end
    end,

    observation = (s, a, sp) -> Normal(sp, abs(sp - light_loc) + 0.0001),

    reward = function (s, a)
        if a == 0
            return s == 0 ? 100 : -100
        else
            return -1.0
        end
    end,

    initialstate = Uniform(div(-r,2):div(r,2))
);

In [12]:
trajectory = simulate(HistoryRecorder(), m, RandomPolicy(m))

7-element POMDPSimulators.SimHistory{NamedTuple{(:s, :a, :sp, :o, :r, :info, :t, :action_info, :b, :bp, :update_info), Tuple{Int64, Int64, Int64, Float64, Float64, Nothing, Int64, Nothing, Nothing, Nothing, Nothing}}, Float64}:
 (s = -28, a = -10, sp = -38, o = -32.607772834636705, r = -1.0, info = nothing, t = 1, action_info = nothing, b = nothing, bp = nothing, update_info = nothing)
 (s = -38, a = -1, sp = -39, o = -54.75286154041623, r = -1.0, info = nothing, t = 2, action_info = nothing, b = nothing, bp = nothing, update_info = nothing)
 (s = -39, a = -1, sp = -40, o = -67.47664792779011, r = -1.0, info = nothing, t = 3, action_info = nothing, b = nothing, bp = nothing, update_info = nothing)
 (s = -40, a = -10, sp = -50, o = -59.56307574595351, r = -1.0, info = nothing, t = 4, action_info = nothing, b = nothing, bp = nothing, update_info = nothing)
 (s = -50, a = 1, sp = -49, o = -153.67193204899058, r = -1.0, info = nothing, t = 5, action_info = nothing, b = nothing, bp = nothin

In [11]:
function rejection_particle_update(m, b, a, o)
    bp = statetype(m)[]
    while length(bp) < b
        s = rand(b)
        sp, o_gen = @gen(:sp, :o)(m, s, a)
        if o_gen == o
            push!(bp, rand(b))
        end
    end
    return bp
end

rejection_particle_update (generic function with 1 method)