In [1]:
using POMDPs
using POMDPModelTools
using Distributions
using POMDPPolicies  # For random policy, you might need different packages for other policies like POMCP
using POMDPSimulators
using POMDPModels
using BeliefUpdaters
using Printf
using FIB

In [2]:
struct driveState
    h::Int
    r::Int
    z::Int
    phi::Int
end

In [3]:
struct driveObservation
    h::Int
    r::Int
    a::Int
end

In [4]:
# return probability of action = "stop"
function human_policy(h, r,  z)
    if h != 0 || r !=0 # maybe the not equal operation is not working here
        return 0.5
    elseif z == 0
        return 0.9
    else
        return 0.1
    end
end


human_policy (generic function with 1 method)

In [5]:
m = human_policy(0,0,0)
println(m)

0.9


In [6]:
# return updated z
function human_dynamics(h, r, z, phi)
    if phi == 0
        return z
    else
        if h == 0 || r == 0
            return 0
        else
            return z
        end
    end
end

human_dynamics (generic function with 1 method)

In [7]:
h_test = 0
r_test = 0
z_test = 1
phi_test = 0
z_up_test = human_dynamics(h_test, r_test, z_test, phi_test)
print("z_updated ")
println(z_up_test)
hp_test = human_policy(h_test, r_test, z_up_test)
print("p_stop ")
println(hp_test) 

z_updated 1


p_stop 0.1


In [8]:
struct drivePOMDP <: POMDP{driveState, Int, driveObservation} # POMDP{State, Action, Observation}
    discount_factor::Float64 # discount 
end

In [9]:
drivePOMDP() = drivePOMDP(0.95)

drivePOMDP

In [10]:
s = driveState[] # initialize an array of GridWorldStates
    

driveState[]

In [11]:
for phi = 0:1, z = 0:1, y = 0:1, x = 0:1
    push!(s, driveState(x,y,z,phi))
end

In [12]:
println(s[14])

driveState(1, 0, 1, 1)


In [13]:
POMDPs.states(pomdp::drivePOMDP) = s

In [14]:
mdp_2 = drivePOMDP() 

drivePOMDP(0.95)

In [15]:
s_space = states(mdp_2)

16-element Vector{driveState}:
 driveState(0, 0, 0, 0)
 driveState(1, 0, 0, 0)
 driveState(0, 1, 0, 0)
 driveState(1, 1, 0, 0)
 driveState(0, 0, 1, 0)
 driveState(1, 0, 1, 0)
 driveState(0, 1, 1, 0)
 driveState(1, 1, 1, 0)
 driveState(0, 0, 0, 1)
 driveState(1, 0, 0, 1)
 driveState(0, 1, 0, 1)
 driveState(1, 1, 0, 1)
 driveState(0, 0, 1, 1)
 driveState(1, 0, 1, 1)
 driveState(0, 1, 1, 1)
 driveState(1, 1, 1, 1)

In [16]:
function POMDPs.stateindex(pomdp::drivePOMDP, state::driveState)  
    idx = 1 + state.h*(2^0) + state.r*(2^1) + state.z*(2^2)+ state.phi*(2^3) # check if idx is being correctly returned
    return idx 
end

In [17]:
num = stateindex(mdp_2,  driveState(0, 0, 1, 0))

5

In [18]:
POMDPs.actions(pomdp::drivePOMDP) = [0, 1]

In [19]:
j_1 = actions(mdp_2)

2-element Vector{Int64}:
 0
 1

In [20]:
print(j_1)

[0, 1]

In [21]:
function POMDPs.actionindex(pomdp::drivePOMDP, act::Int64)
    if act==0
        return 1
    elseif act==1
        return 2
    end
end

In [22]:
m_1 = actionindex(mdp_2, 0)

1

In [23]:
function POMDPs.observations(pomdp::drivePOMDP)
    s = driveObservation[] 
    for h = 0:1, r = 0:1, a = 0:1
        push!(s,driveObservation(h,r,a))
    end
    return s
end

In [24]:
ob_space = observations(mdp_2)

8-element Vector{driveObservation}:
 driveObservation(0, 0, 0)
 driveObservation(0, 0, 1)
 driveObservation(0, 1, 0)
 driveObservation(0, 1, 1)
 driveObservation(1, 0, 0)
 driveObservation(1, 0, 1)
 driveObservation(1, 1, 0)
 driveObservation(1, 1, 1)

In [25]:
function POMDPs.obsindex(pomdp::drivePOMDP, o::driveObservation)
    idx = 1 + o.h*(2^0) + o.r*(2^1) + o.a*(2^2)
    return idx 
end

In [26]:
obs_idx = obsindex(mdp_2, driveObservation(1,1,1))

8

In [27]:
function POMDPs.reward(pomdp::drivePOMDP, state::driveState, act::Int64)
    h = state.h
    r = state.r
    if h == 0 && r == 0
        rwd = 0
    elseif h == 1 && r == 0
        rwd = -3
    elseif h == 1 && r == 1
        rwd = -10
    elseif h == 0 && r == 1
        rwd = +5
    else
        rwd = 0
    end
    return rwd
end

In [28]:
rwd = reward(mdp_2, driveState(1,1,0,1), 0)

-10

In [29]:
function POMDPs.observation(pomdp::drivePOMDP, act::Int64, state::driveState)
    h, r, z, phi = state.h, state.r, state.z, state.phi
    a = act
    p_stop = human_policy(h, r, z)

    # If the action to stop is taken by the robot, the human stops at a probability of p_stop this distribution needs to be returned for both stop action and go action.
    if a == 0
        if r == 0 && h == 0
            return SparseCat([driveObservation(0,0,a), driveObservation(1,0,a)], [p_stop, 1-p_stop])
        else
            return Deterministic(1.0e-9)
        end
    elseif a == 1
        if r  == 0 && h == 0
            return SparseCat([driveObservation(0,1,a), driveObservation(1,1,a)], [p_stop, 1-p_stop])
        else
            return Deterministic(1.0e-9)
        end
    else
        return Deterministic(1.0e-9)
    end
end

In [30]:
obsdist = observation(mdp_2,1, driveState(0,0,1,1))

p_stop0.1


                                       [97;1mSparseCat distribution[0m           
                             [38;5;8m┌                                        ┐[0m 
   driveObservation(0, 1, 1) [38;5;8m┤[0m[38;5;2m■■■■[0m 0.1                                [38;5;8m [0m [38;5;8m[0m
   driveObservation(1, 1, 1) [38;5;8m┤[0m[38;5;2m■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■[0m 0.9 [38;5;8m [0m [38;5;8m[0m
                             [38;5;8m└                                        ┘[0m 

In [31]:
function POMDPs.transition(pomdp::drivePOMDP, state::driveState, act::Int64)
    h, r, z, phi = state.h, state.r, state.z, state.phi
    a = act
    p_stop = human_policy(h, r, z)

    # reset the game
    if h > 0 || r > 0
        return Deterministic(driveState(0,0,z,phi))
    elseif h == 0 && r == 0
        if a == 0
            return SparseCat([driveState(0,0,z,phi), driveState(0,1,z,phi)], [p_stop, 1-p_stop])
        elseif a == 1
            return SparseCat([driveState(1,0,z,phi), driveState(1,1,z,phi)], [p_stop, 1-p_stop])
        else
            return Deterministic(1.0e-9)
        end
    else
        return Deterministic(1.0e-9)
    end
end

In [32]:
trans_test = transition(mdp_2, driveState(0,0,1,1),0)

                                    [97;1mSparseCat distribution[0m           
                          [38;5;8m┌                                        ┐[0m 
   driveState(0, 0, 1, 1) [38;5;8m┤[0m[38;5;2m■■■■[0m 0.1                                [38;5;8m [0m [38;5;8m[0m
   driveState(0, 1, 1, 1) [38;5;8m┤[0m[38;5;2m■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■[0m 0.9 [38;5;8m [0m [38;5;8m[0m
                          [38;5;8m└                                        ┘[0m 

In [33]:
POMDPs.initialstate(pomdp::drivePOMDP) = SparseCat([(driveState(0, 0, 1, 1)), driveState(0,0,1,0)], [0.5, 0.5])

In [34]:
POMDPs.discount(pomdp::drivePOMDP) = pomdp.discount_factor

In [35]:
mdp_3 = drivePOMDP()

drivePOMDP(0.95)

In [36]:
using POMDPSimulators
using POMDPPolicies
policy = RandomPolicy(mdp_3)

RandomPolicy{Random.TaskLocalRNG, drivePOMDP, NothingUpdater}(Random.TaskLocalRNG(), drivePOMDP(0.95), NothingUpdater())

In [37]:
println(policy)

RandomPolicy{Random.TaskLocalRNG, drivePOMDP, NothingUpdater}(Random

.TaskLocalRNG(), drivePOMDP(0.95), NothingUpdater())


In [38]:
for (s, a, r) in stepthrough(mdp_3, policy, "s, a, r", max_steps=10)
    @show s
    @show a
    @show r
    println()
end

s = driveState(0, 0, 1, 1)
a = 1
r = 0

s = driveState(1, 1, 1, 1)
a = 0
r = -10

s = driveState(0, 0, 1, 1)
a = 1
r = 0

p_stop0.1
s = driveState(1, 1, 1, 1)
a = 1
r = -10

s = driveState(0, 0, 1, 1)
a = 0
r = 0

p_stop0.1
s = driveState(0, 1, 1, 1)
a = 1
r = 5

s = driveState(0, 0, 1, 1)
a = 1
r = 0

s = driveState(1, 1, 1, 1)
a = 0
r = -10

s = driveState(0, 0, 1, 1)
a = 1
r = 0

p_stop0.1
s = driveState(1, 0, 1, 1)
a = 1
r = -3



In [39]:
solver = FIBSolver()

FIBSolver(100, 0.001, false)

In [40]:
# Solve the problem offline and obtain the FIB policy which is an AlphaVectorPolicy
fib_policy = solve(solver, mdp_3)

p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.9
p_stop0.9
p_stop0.9
p_stop0.9


AlphaVectorPolicy{drivePOMDP, Int64}(drivePOMDP(0.95), 16, [[0.0, -3.0, 5.0, -10.0, 0.0, -3.0, 5.0, -10.0, 0.0, -3.0, 5.0, -10.0, 0.0, -3.0, 5.0, -10.0], [0.0, -3.0, 5.0, -10.0, 0.0, -3.0, 5.0, -10.0, 0.0, -3.0, 5.0, -10.0, 0.0, -3.0, 5.0, -10.0]], [0, 1])

In [41]:

# Create a TigerPOMDP policy that chooses actions at random
rand_policy = RandomPolicy(mdp_3);

In [42]:
# Create and run the rollout simulator
rollout_sim = RolloutSimulator(max_steps=10);
fib_reward = simulate(rollout_sim, mdp_3, fib_policy);
rand_reward = simulate(rollout_sim, mdp_3, rand_policy);

p_stop0.1
p_stop0.1


In [43]:
@show fib_reward;
@show rand_reward;

fib_reward = 19.54871321659179
rand_reward = 9.073653775185544


In [44]:
using POMDPs, POMDPModels, POMDPSimulators, BasicPOMCP

solver = POMCPSolver()
planner = solve(solver, mdp_3)

POMCPPlanner{drivePOMDP, BasicPOMCP.SolvedPORollout{RandomPolicy{Random._GLOBAL_RNG, drivePOMDP, NothingUpdater}, NothingUpdater, Random._GLOBAL_RNG}, Random._GLOBAL_RNG}(POMCPSolver
  max_depth: Int64 20
  c: Float64 1.0
  tree_queries: Int64 1000
  max_time: Float64 Inf
  tree_in_info: Bool false
  default_action: ExceptionRethrow ExceptionRethrow()
  rng: Random._GLOBAL_RNG Random._GLOBAL_RNG()
  estimate_value: RolloutEstimator
, drivePOMDP(0.95), BasicPOMCP.SolvedPORollout{RandomPolicy{Random._GLOBAL_RNG, drivePOMDP, NothingUpdater}, NothingUpdater, Random._GLOBAL_RNG}(RandomPolicy{Random._GLOBAL_RNG, drivePOMDP, NothingUpdater}(Random._GLOBAL_RNG(), drivePOMDP(0.95), NothingUpdater()), NothingUpdater(), Random._GLOBAL_RNG()), Random._GLOBAL_RNG(), Int64[], nothing)

In [45]:
b = initialstate(mdp_3)


                                    [97;1mSparseCat distribution[0m           
                          [38;5;8m┌                                        ┐[0m 
   driveState(0, 0, 1, 1) [38;5;8m┤[0m[38;5;2m■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■[0m 0.5 [38;5;8m [0m [38;5;8m[0m
   driveState(0, 0, 1, 0) [38;5;8m┤[0m[38;5;2m■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■[0m 0.5 [38;5;8m [0m [38;5;8m[0m
                          [38;5;8m└                                        ┘[0m 

In [47]:
b = initialstate(mdp_3)
a = action(planner, b)
println("""
    POMCP Recommends action 
b.

    (this may be a bad choice because the POMCP Parameters are set to their defaults.)
""")

MethodError: MethodError: Cannot `convert` an object of type Float64 to an object of type driveObservation

Closest candidates are:
  convert(::Type{T}, !Matched::T) where T
   @ Base Base.jl:84
  driveObservation(::Any, !Matched::Any, !Matched::Any)
   @ Main ~/Fall23/aizen/pomdp_test/drive_test_jl.ipynb:2


In [48]:

for (s, a, o) in stepthrough(mdp_3, planner, "s,a,o", max_steps=10)
    println("State was $s,")
    println("action $a was taken,")
end

p_stop0.1
p_stop0.1
p_stop0.1
p_stop0.1


MethodError: MethodError: Cannot `convert` an object of type Float64 to an object of type driveObservation

Closest candidates are:
  convert(::Type{T}, !Matched::T) where T
   @ Base Base.jl:84
  driveObservation(::Any, !Matched::Any, !Matched::Any)
   @ Main ~/Fall23/aizen/pomdp_test/drive_test_jl.ipynb:2


In [49]:


for (s, b, a, r, sp, o) in hist
    @show s, a, r, sp
end

rhist = simulate(hr, mdp_3, RandomPolicy(mdp_3))
println("""
    Cumulative Discounted Reward (for 1 simulation)
        Random: $(discounted_reward(rhist))
        POMCPOW: $(discounted_reward(hist))
    """)

UndefVarError: UndefVarError: `hist` not defined