# Example of generative POMDP

from https://github.com/JuliaPOMDP/POMDPExamples.jl/blob/master/notebooks/Defining-a-POMDP-with-the-Generative-Interface.ipynb

In [2]:
using POMDPs
using Random
using POMDPModelTools
using POMDPSimulators
using POMDPPolicies

In [3]:
struct BabyPOMDP <: POMDP{Bool, Bool, Bool}
    r_feed::Float64
    r_hungry::Float64
    p_become_hungry::Float64
    p_cry_when_hungry::Float64
    p_cry_when_not_hungry::Float64
    discount::Float64   
end

In [4]:
function POMDPs.gen(m::BabyPOMDP, s, a, rng)
    # transition model
    if a # feed
        sp = false
    elseif s # hungry
        sp = true
    else # not hungry
        sp = rand(rng) < m.p_become_hungry
    end
    
    # observation model
    if sp # hungry
        o = rand(rng) < m.p_cry_when_hungry
    else # not hungry
        o = rand(rng) < m.p_cry_when_not_hungry
    end
    
    # reward model
    r = s*m.r_hungry + a*m.r_feed
    
    # create and return a NamedTuple
    return (sp=sp, o=o, r=r)
end

In [5]:
BabyPOMDP() = BabyPOMDP(-5., -10., 0.1, 0.8, 0.1, 0.9);
POMDPs.initialstate_distribution(m::BabyPOMDP) = Deterministic(false)

m = BabyPOMDP()

# policy that maps every input to a feed (true) action
policy = FunctionPolicy(o->true)

for (s, a, r) in stepthrough(m, policy, "s,a,r", max_steps=10)
    @show s
    @show a
    @show r
    println()
end

s = false
a = true
r = -5.0

s = false
a = true
r = -5.0

s = false
a = true
r = -5.0

s = false
a = true
r = -5.0

s = false
a = true
r = -5.0

s = false
a = true
r = -5.0

s = false
a = true
r = -5.0

s = false
a = true
r = -5.0

s = false
a = true
r = -5.0

s = false
a = true
r = -5.0

