JuliaPOMDP · MaximeBouton · Oct 11, 2019 · Sep 26, 2019 · Sep 26, 2019 · Sep 27, 2019
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "POMDPModels"
 uuid = "355abbd5-f08e-5560-ac9e-8b5f2592a0ca"
 repo = "https://github.com/JuliaPOMDP/POMDPModels.jl"
-version = "0.3.5"
+version = "0.4.0"
 
 [deps]
 BeliefUpdaters = "8bb6e9a1-7d73-552c-a44a-e5dc5634aac4"
@@ -18,8 +18,9 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
-POMDPSimulators = "< 0.3.0"
-julia = "1"
+POMDPSimulators = "0.3.0"
+POMDPs = "0.7.3, 0.8"
+julia = "1.0"
 
 [extras]
 NBInclude = "0db19996-df87-5ea3-a455-e3a50d440464"

diff --git a/src/CryingBabies.jl b/src/CryingBabies.jl
@@ -18,13 +18,11 @@ updater(problem::BabyPOMDP) = DiscreteUpdater(problem)
 
 actions(::BabyPOMDP) = (true, false)
 actionindex(::BabyPOMDP, a::Bool) = a + 1
-n_actions(::BabyPOMDP) = 2
 states(::BabyPOMDP) = (true, false)
 stateindex(::BabyPOMDP, s::Bool) = s + 1
-n_states(::BabyPOMDP) = 2
 observations(::BabyPOMDP) = (true, false)
 obsindex(::BabyPOMDP, o::Bool) = o + 1
-n_observations(::BabyPOMDP) = 2
+
 
 # start knowing baby is not not hungry
 initialstate_distribution(::BabyPOMDP) = BoolDistribution(0.0)
@@ -61,7 +59,7 @@ end
 
 discount(p::BabyPOMDP) = p.discount
 
-function generate_o(p::BabyPOMDP, s::Bool, rng::AbstractRNG)
+function gen(::DDNOut{:o}, p::BabyPOMDP, s::Bool, rng::AbstractRNG)
     d = observation(p, true, s) # obs distrubtion not action dependant
     return rand(rng, d)
 end

diff --git a/src/InvertedPendulum.jl b/src/InvertedPendulum.jl
@@ -12,7 +12,6 @@
 end
 
 actions(ip::InvertedPendulum) = [-50., 0., 50.]
-n_actions(ip::InvertedPendulum) = 3
 
 function initialstate(ip::InvertedPendulum, rng::AbstractRNG)
   sp = ((rand(rng)-0.5)*0.1, (rand(rng)-0.5)*0.1, )
@@ -53,10 +52,11 @@ function euler(m::InvertedPendulum,s::Tuple{Float64,Float64},a::Float64)
     return (th_,w_)
 end
 
-function generate_s(ip::InvertedPendulum,
-                    s::Tuple{Float64,Float64},
-                    a::Float64,
-                    rng::AbstractRNG)
+function gen(::DDNNode{:sp},
+             ip::InvertedPendulum,
+             s::Tuple{Float64,Float64},
+             a::Float64,
+             rng::AbstractRNG)
   a_offset = 20*(rand(rng)-0.5)
   a_ = a + a_offset
 

diff --git a/src/LightDark.jl b/src/LightDark.jl
@@ -53,7 +53,6 @@ isterminal(::LightDark1D, s::LightDark1DState) = s.status < 0
 
 
 actions(::LightDark1D) = -1:1
-n_actions(p::LightDark1D) = length(actions(p))
 
 
 struct LDNormalStateDist

diff --git a/src/MountainCar.jl b/src/MountainCar.jl
@@ -8,7 +8,6 @@
 end
 
 actions(::MountainCar) = [-1., 0., 1.]
-n_actions(mc::MountainCar) = 3
 
 reward(mc::MountainCar,
        s::Tuple{Float64,Float64},
@@ -20,10 +19,11 @@ initialstate(mc::MountainCar, ::AbstractRNG) = (-0.5,0.,)
 isterminal(::MountainCar,s::Tuple{Float64,Float64}) = s[1] >= 0.5
 discount(mc::MountainCar) = mc.discount
 
-function generate_s( mc::MountainCar,
-                     s::Tuple{Float64,Float64},
-                     a::Float64,
-                     ::AbstractRNG)
+function gen(::DDNNode{:sp},
+             mc::MountainCar,
+             s::Tuple{Float64,Float64},
+             a::Float64,
+             ::AbstractRNG)
   x,v = s
   v_ = v + a*0.001+cos(3*x)*-0.0025
   v_ = max(min(0.07,v_),-0.07)

diff --git a/src/POMDPModels.jl b/src/POMDPModels.jl
@@ -28,10 +28,10 @@ import Base: ==, hash
 import Random: rand, rand!
 import Distributions: pdf
 
-import POMDPs: initialstate, generate_s, generate_o, generate_sor, support, discount, isterminal
-import POMDPs: actions, n_actions, actionindex, action, dimensions
-import POMDPs: states, n_states, stateindex, transition
-import POMDPs: observations, observation, n_observations, obsindex
+import POMDPs: gen, support, discount, isterminal
+import POMDPs: actions, actionindex, action, dimensions
+import POMDPs: states, stateindex, transition
+import POMDPs: observations, observation, obsindex
 import POMDPs: initialstate, initialstate_distribution
 import POMDPs: updater, update
 import POMDPs: reward

diff --git a/src/TMazes.jl b/src/TMazes.jl
@@ -18,9 +18,6 @@ end
     discount::Float64 = 0.99 # discount factor
 end
 
-n_states(m::TMaze) = 2 * (m.n + 1) + 1 # 2*(corr length + 1 (junction)) + 1 (term)
-n_actions(::TMaze) = 4
-n_observations(::TMaze) = 5
 
 # state space is length of corr + 3 cells at the end
 #                   |G|
@@ -80,7 +77,7 @@ end
 support(d::TMazeInit) = zip(d.states, d.probs)
 function initialstate_distribution(maze::TMaze)
     s = states(maze)
-    ns = n_states(maze)
+    ns = length(s)
     p = zeros(ns) .+ 1.0 / (ns-1)
     p[end] = 0.0
     #s1 = TMazeState(1, :north, false)
@@ -219,7 +216,7 @@ function stateindex(maze::TMaze, s::TMazeState)
     end
 end
 
-function generate_o(maze::TMaze, s::TMazeState, rng::AbstractRNG)
+function gen(::DDNOut{:o}, maze::TMaze, s::TMazeState, rng::AbstractRNG)
     s.term ? (return 5) : (nothing)
     x = s.x; g = s.g
     #if x == 1

diff --git a/src/Tabular.jl b/src/Tabular.jl
@@ -43,12 +43,8 @@ pdf(d::DiscreteDistribution, sp::Int64) = d.p[sp] # T(s', a, s)
 rand(rng::AbstractRNG, d::DiscreteDistribution) = sample(rng, Weights(d.p))
 
 # MDP and POMDP common methods
-
-n_states(prob::TabularProblem) = size(prob.T, 1)
-n_actions(prob::TabularProblem) = size(prob.T, 2)
-
-states(p::TabularProblem) = 1:n_states(p)
-actions(p::TabularProblem) = 1:n_actions(p)
+states(p::TabularProblem) = 1:size(p.T, 1)
+actions(p::TabularProblem) = 1:size(p.T, 2)
 
 stateindex(::TabularProblem, s::Int64) = s
 actionindex(::TabularProblem, a::Int64) = a
@@ -59,7 +55,7 @@ transition(p::TabularProblem, s::Int64, a::Int64) = DiscreteDistribution(view(p.
 
 reward(prob::TabularProblem, s::Int64, a::Int64) = prob.R[s, a]
 
-initialstate_distribution(p::TabularProblem) = DiscreteDistribution(ones(n_states(p))./n_states(p))
+initialstate_distribution(p::TabularProblem) = DiscreteDistribution(ones(length(states(p)))./length(states(p)))
 
 # POMDP only methods
 n_observations(p::TabularProblem) = size(p.O, 1)

diff --git a/src/TigerPOMDPs.jl b/src/TigerPOMDPs.jl
@@ -24,10 +24,6 @@ const TIGER_LEFT = true
 const TIGER_RIGHT = false
 
 
-n_states(::TigerPOMDP) = 2
-n_actions(::TigerPOMDP) = 3
-n_observations(::TigerPOMDP) = 2
-
 # Resets the problem after opening door; does nothing after listening
 function transition(pomdp::TigerPOMDP, s::Bool, a::Int64)
     p = 1.0
@@ -81,7 +77,7 @@ end
 
 discount(pomdp::TigerPOMDP) = pomdp.discount_factor
 
-function generate_o(p::TigerPOMDP, s::Bool, rng::AbstractRNG)
-    d = observation(p, 0, s) # obs distrubtion not action dependant
+function gen(::DDNOut{:o}, p::TigerPOMDP, s::Bool, rng::AbstractRNG)
+    d = observation(p, 0, s) # obs distribution not action dependent
     return rand(rng, d)
 end
diff --git a/src/gridworld.jl b/src/gridworld.jl
@@ -31,12 +31,12 @@ function POMDPs.states(mdp::SimpleGridWorld)
     push!(ss, GWPos(-1,-1))
     return ss
 end
-POMDPs.n_states(mdp::SimpleGridWorld) = prod(mdp.size) + 1
+
 function POMDPs.stateindex(mdp::SimpleGridWorld, s::AbstractVector{Int})
     if all(s.>0)
         return LinearIndices(mdp.size)[s...]
     else
-        return n_states(mdp)
+        return prod(mdp.size) + 1 # TODO: Change
     end
 end
 
@@ -59,7 +59,7 @@ POMDPs.initialstate_distribution(mdp::SimpleGridWorld) = GWUniform(mdp.size)
 
 POMDPs.actions(mdp::SimpleGridWorld) = (:up, :down, :left, :right)
 Base.rand(rng::AbstractRNG, t::NTuple{L,Symbol}) where L = t[rand(rng, 1:length(t))] # don't know why this doesn't work out of the box
-POMDPs.n_actions(mdp::SimpleGridWorld) = 4
+
 
 const dir = Dict(:up=>GWPos(0,1), :down=>GWPos(0,-1), :left=>GWPos(-1,0), :right=>GWPos(1,0))
 const aind = Dict(:up=>1, :down=>2, :left=>3, :right=>4)
@@ -76,16 +76,15 @@ function POMDPs.transition(mdp::SimpleGridWorld, s::AbstractVector{Int}, a::Symb
         return Deterministic(GWPos(-1,-1))
     end
 
-    destinations = MVector{n_actions(mdp)+1, GWPos}(undef)
+    destinations = MVector{length(actions(mdp))+1, GWPos}(undef)
     destinations[1] = s
 
-    # probs = MVector{n_actions(mdp)+1, Float64}()
-    probs = @MVector(zeros(n_actions(mdp)+1))
+    probs = @MVector(zeros(length(actions(mdp))+1))
     for (i, act) in enumerate(actions(mdp))
         if act == a
             prob = mdp.tprob # probability of transitioning to the desired cell
         else
-            prob = (1.0 - mdp.tprob)/(n_actions(mdp) - 1) # probability of transitioning to another cell
+            prob = (1.0 - mdp.tprob)/(length(actions(mdp)) - 1) # probability of transitioning to another cell
         end
 
         dest = s + dir[act]

diff --git a/src/legacy/GridWorlds.jl b/src/legacy/GridWorlds.jl
@@ -88,8 +88,6 @@ end
 
 actions(mdp::LegacyGridWorld) = [:up, :down, :left, :right]
 
-n_states(mdp::LegacyGridWorld) = mdp.size_x*mdp.size_y+1
-n_actions(mdp::LegacyGridWorld) = 4
 
 function reward(mdp::LegacyGridWorld, state::GridWorldState, action::Symbol)
     if state.done

diff --git a/test/crying.jl b/test/crying.jl
@@ -19,8 +19,8 @@ let
     r = simulate(sim, problem, policy, updater(policy), ib, true)
     @test r ≈ -100.0 atol=0.01
 
-    # test generate_o
-    o = generate_o(problem, true, MersenneTwister(1))
+    # test gen(::o,...)
+    o = gen(DDNOut(:o), problem, true, MersenneTwister(1))
     @test o == 1
     # test vec
     ov = convert_s(Array{Float64}, true, problem)

diff --git a/test/lightdark.jl b/test/lightdark.jl
@@ -6,14 +6,14 @@ let
     p = LightDark1D()
     @test discount(p) == 0.9
     s0 = LightDark1DState(0,0)
-    s0, _, r = generate_sor(p, s0, +1, rng)
+    s0, _, r = gen(DDNOut(:sp, :o, :r), p, s0, +1, rng)
     @test s0.y == 1.0
     @test r == 0
-    s1, _, r = generate_sor(p, s0, 0, rng)
+    s1, _, r = gen(DDNOut(:sp, :o, :r), p, s0, 0, rng)
     @test s1.status != 0
     @test r == -10.0
     s2 = LightDark1DState(0, 5)
-    obs = generate_o(p, nothing, nothing, s2, rng)
+    obs = gen(DDNNode(:o), p, nothing, nothing, s2, rng)
     @test abs(obs-6.0) <= 1.1
 
 

diff --git a/test/tiger.jl b/test/tiger.jl
@@ -22,8 +22,8 @@ let
 
     simulate(sim, pomdp1, policy, updater(policy), initialstate_distribution(pomdp1))
 
-    # test generate_o
-    o = generate_o(pomdp1, true, MersenneTwister(1))
+    # test gen(:o, ...)
+    o = gen(DDNOut(:o), pomdp1, true, MersenneTwister(1))
     @test o == 1
     # test vec
     ov = convert_o(Array{Float64}, true, pomdp1)

diff --git a/test/tmaze.jl b/test/tmaze.jl
@@ -13,7 +13,7 @@ simulate(sim, problem, policy, updater(policy), initialstate_distribution(proble
 POMDPTesting.probability_check(problem)
 
 function test_obs(s::TMazeState, o::Int64)
-    ot = generate_o(problem, s, MersenneTwister(1))
+    ot = gen(DDNOut(:o), problem, s, MersenneTwister(1))
     @test ot == o
 end
 
@@ -27,5 +27,3 @@ ov = convert_o(Array{Float64}, 1, problem)
 @test ov == [1.]
 o = convert_o(Int64, ov, problem)
 @test o == 1
-
-