In [25]:
using POMDPs
using POMDPModelTools
using Distributions
using POMDPPolicies  # For random policy, you might need different packages for other policies like POMCP
using POMDPSimulators
using POMDPModels
using BeliefUpdaters
using Printf
using FIB

In [26]:
struct MyState
    s::Tuple{Int, Int}
    z::Int
    phi::Int
end

In [27]:
struct MyAction
    a::String
end


In [28]:
struct MyObservation
    s::Tuple{Int, Int}
    a::String
end

In [29]:
function human_policy(s, z)
    if s != (0, 0) # maybe the not equal operation is not working here
        return 0.5
    elseif z == 0
        return 0.9
    else
        return 0.1
    end
end


human_policy (generic function with 1 method)

In [30]:

function human_dynamics(s, z, phi)
    if phi == 0 || (s[1] == 1 && s[2] == 1) # Check for or operation
        return 0
    else
        return z
    end
end

human_dynamics (generic function with 1 method)

In [31]:
struct DrivePOMDP <: POMDP{MyState, MyAction, MyObservation} # POMDP{State, Action, Observation}
end

In [32]:
function POMDPs.states(pomdp::DrivePOMDP)
    s = MyState[] # initialize an array of GridWorldStates
    # loop over all our states, remeber there are two binary variables:
    # done (d)
    for x = 0:1, y = 0:1, z = 0:1, phi = 0:1
        push!(s, MyState((x,y),z, phi)) # check if statespace is being fully formed
    end
    return s
end

In [33]:
function POMDPs.stateindex(pomdp::DrivePOMDP, state::MyState)
    
    idx = 1 + state.s[1]*(2^0) + state.s[2]*(2^1) + state.z*(2^2)+ state.phi*(2^3) # check if idx is being correctly returned
    return idx 
end

In [34]:
function POMDPs.actions(pomdp::DrivePOMDP)
    s = MyAction[]
    for x = 0:1
        if (x == 0)
            push!(s, MyAction("stop"))
        else
            push!(s, MyAction("go"))
        end
    end
end

In [35]:
function POMDPs.actionindex(pomdp::DrivePOMDP, act::MyAction)
    
    if act.a =="stop"
        return 1
    else
        return 2
    end
end

In [36]:
function POMDPs.observations(pomdp::DrivePOMDP)
    s = MyObservation[] 
    for x = 0:1, y = 0:1, a = 0:1
        if (a == 0)
            push!(s,MyObservation((x,y),"stop"))
        else
            push!(s,MyObservation((x,y),"go"))
        end
    end
    return s
end

In [37]:
function POMDPs.obsindex(pomdp::DrivePOMDP, o::MyObservation)
    if o.a == "stop"
        idx = 1 + o.s[1]*(2^0) + o.s[2]*(2^1) + 0*(2^2)
    else 
        idx = 1 + o.s[1]*(2^0) + o.s[2]*(2^1) + 1*(2^2)
    end
    return idx 
end

In [38]:
function POMDPs.reward(pomdp::DrivePOMDP, state::MyState, action::MyAction)
    s = state.s
    if s == (0, 0)
        return 0
    elseif s == (1, 0)
        return -3
    elseif s == (1, 1)
        return -10
    elseif s == (0, 1)
        return +5
    else
        return 0
    end
end

In [39]:
function POMDPs.observation(pomdp::DrivePOMDP, state::MyState, action::MyAction)
    s, z = state.s, state.z
    a = action
    p_stop = human_policy(s, z)

    if rand() < p_stop
        return MyObservation(s, "stop")
    else
        return MyObservation(s, "go")
    end
end

In [40]:
function POMDPs.transition(pomdp::DrivePOMDP, state::MyState, action::MyAction)
    s, z, phi = state.s, state.z, state.phi
    a = action.a
    p_stop = human_policy(s, z)

    # reset the game
    if s[1] > 0 || s[2] > 0
        return MyState((0, 0), human_dynamics(s, z, phi), phi)
    end

    # move robot car
    s1 = [s[1], s[2]]
    if a == "go"
        s1[2] += 1
    end

    # move human car
    if rand() < p_stop
        return MyState((s1[1], s1[2]), human_dynamics(s, z, phi), phi)
    else
        s1[1] += 1
        return MyState((s1[1], s1[2]), human_dynamics(s, z, phi), phi)
    end
end

In [41]:
POMDPs.discount(pomdp::DrivePOMDP) = 0.95

In [42]:
POMDPs.initialstate(pomdp::DrivePOMDP) = MyState((0, 0), 1, 1)

In [43]:
pomdp = DrivePOMDP()

DrivePOMDP()

In [44]:
using Random
policy = RandomPolicy(pomdp)


RandomPolicy{TaskLocalRNG, DrivePOMDP, NothingUpdater}(TaskLocalRNG(), DrivePOMDP(), NothingUpdater())

In [45]:
for (s, a, r) in stepthrough(pomdp, policy, "s,a,r", max_steps=10)
    @show s
    @show a
    @show r
    println()
end

MethodError: MethodError: no method matching Random.Sampler(::Type{TaskLocalRNG}, ::Random.SamplerTrivial{MyState, Any}, ::Val{1})

Closest candidates are:
  Random.Sampler(::Type{<:AbstractRNG}, ::Random.Sampler, ::Union{Val{1}, Val{Inf}})
   @ Random ~/.julia/juliaup/julia-1.9.4+0.x64.linux.gnu/share/julia/stdlib/v1.9/Random/src/Random.jl:148
  Random.Sampler(::Type{<:AbstractRNG}, ::Any, ::Union{Val{1}, Val{Inf}})
   @ Random ~/.julia/juliaup/julia-1.9.4+0.x64.linux.gnu/share/julia/stdlib/v1.9/Random/src/Random.jl:184
  Random.Sampler(!Matched::AbstractRNG, ::Any, ::Union{Val{1}, Val{Inf}})
   @ Random ~/.julia/juliaup/julia-1.9.4+0.x64.linux.gnu/share/julia/stdlib/v1.9/Random/src/Random.jl:140
  ...


In [46]:
solver = FIBSolver()

FIBSolver(100, 0.001, false)

In [47]:
# Solve the problem offline and obtain the FIB policy which is an AlphaVectorPolicy
fib_policy = solve(solver, pomdp)

MethodError: MethodError: no method matching length(::Nothing)

Closest candidates are:
  length(!Matched::Union{Base.KeySet, Base.ValueIterator})
   @ Base abstractdict.jl:58
  length(!Matched::Union{SparseArrays.FixedSparseVector{Tv, Ti}, SparseArrays.SparseVector{Tv, Ti}} where {Tv, Ti})
   @ SparseArrays ~/.julia/juliaup/julia-1.9.4+0.x64.linux.gnu/share/julia/stdlib/v1.9/SparseArrays/src/sparsevector.jl:95
  length(!Matched::Union{DataStructures.OrderedRobinDict, DataStructures.RobinDict})
   @ DataStructures ~/.julia/packages/DataStructures/MKv4P/src/ordered_robin_dict.jl:86
  ...


In [48]:

# Create a TigerPOMDP policy that chooses actions at random
rand_policy = RandomPolicy(pomdp);

In [49]:
fib_reward = simulate(rollout_sim, pomdp, fib_policy);
rand_reward = simulate(rollout_sim, pomdp, rand_policy);

UndefVarError: UndefVarError: `rollout_sim` not defined

In [50]:
@show fib_reward;
@show rand_reward;

UndefVarError: UndefVarError: `fib_reward` not defined