Discrete MDP implemented using `github.com/JuliaPOMDP/QuickPOMDPs.jl` and tutorial [here](https://htmlview.glitch.me/?https://github.com/JuliaAcademy/Decision-Making-Under-Uncertainty/blob/master/html/1-MDPs.jl.html).

In [1]:
# throws warnings sometimes
using POMDPs, QuickPOMDPs, POMDPModelTools, POMDPPolicies, Parameters, Random, Plots, LinearAlgebra

In [2]:
# check Julia exists
print("Hello World")

Hello World

In [3]:
# TODO: figure out what a struct is
@with_kw struct MyParameters
    N::Int = 4   # size of item set
    K::Int = 3   # size of arm set
    M::Int = 2   # size of beta set
    y::Real = 1.0 # discount factor
    umax::Real = 10  # max utility
end

params = MyParameters()

MyParameters
  N: Int64 4
  K: Int64 3
  M: Int64 2
  y: Float64 1.0
  umax: Int64 10


In [4]:
struct State
    u::Array{Int}          # list of N utility values for N items
    d::Array{Array{Real}}  # list of K arm distributions, each assigning probabilities to N items
end

In [5]:
# space of possible utility functions
umax = 10
U = [[params.umax, 0, 0, 0],
    [0, params.umax, 0, 0],
    [0, 0, params.umax, 0],
    [0, 0, 0, params.umax]]

# space of possible arm distributions
D = [[[1/params.N, 1/params.N, 1/params.N, 1/params.N], [1, 0, 0, 0], [0.5, 0.5, 0, 0]],
    [[0, 0, 0, 1], [1/params.N, 1/params.N, 1/params.N, 1/params.N], [0.5, 0.5, 0, 0]]    ]

# State space
S = [State(u,d) for u in U, d in D]

# state comparator
Base.:(==)(s1::State, s2::State) = (s1.u == s2.u) && (s1.d == s2.d)

In [6]:
# Action space - actions are arm choices (K) or beta selections (M)
@enum Action C1 C2 C3 B1 B2
A = [C1, C2, C3, B1, B2]


5-element Vector{Action}:
 C1::Action = 0
 C2::Action = 1
 C3::Action = 2
 B1::Action = 3
 B2::Action = 4

In [7]:
# Transition function
function T(s::State, a::Action)
    return (s, 1.0)    # categorical distribution
end

T (generic function with 1 method)

In [12]:
# Reward function
function R(s::State, a::Action)
    # if arm pulled, return that arm's avg utility
    if a == C1
        utilities = s.u
        arm_dist = s.d[1]
        return dot(utilities, arm_dist)
    elseif a == C2
        utilities = s.u
        arm_dist = s.d[2]
        return dot(utilities, arm_dist)
    elseif a == C3
        utilities = s.u
        arm_dist = s.d[3]
        return dot(utilities, arm_dist)
    # if beta selected, return 0
    else
        return 0
    end
end

R (generic function with 1 method)

In [13]:
# define MDP...?
abstract type MyMDP <: MDP{State, Action} end
mdp = QuickMDP(MyMDP,
    states       = S,
    actions      = A,
    transition   = T,
    reward       = R,
    discount     = params.y,
    initialstate = S);

In [14]:
using DiscreteValueIteration

In [15]:
solver = ValueIterationSolver(max_iterations=20)
policy = solve(solver, mdp)
policy

INFO: POMDPLinter requirements for [34msolve(::ValueIterationSolver, ::Union{MDP,POMDP})[39m and dependencies. ([✔] = implemented correctly; [X] = not implemented; [?] = could not determine)

For [34msolve(::ValueIterationSolver, ::Union{MDP,POMDP})[39m:
[32m  [✔] discount(::QuickMDP{MyMDPStateActionNamedTuple{(:stateindex, :isterminal, :actionindex, :transition, :reward, :states, :actions, :discount, :initialstate), Tuple{Dict{State, Int64}, Bool, Dict{Action, Int64}, typeof(T), typeof(R), Matrix{State}, Vector{Action}, Float64, Matrix{State}}}})[39m
[32m  [✔] transition(::QuickMDP{MyMDPStateActionNamedTuple{(:stateindex, :isterminal, :actionindex, :transition, :reward, :states, :actions, :discount, :initialstate), Tuple{Dict{State, Int64}, Bool, Dict{Action, Int64}, typeof(T), typeof(R), Matrix{State}, Vector{Action}, Float64, Matrix{State}}}}, ::State, ::Action)[39m
[32m  [✔] reward(::QuickMDP{MyMDPStateActionNamedTuple{(:stateindex, :isterminal, :actionindex, :transition, 

LoadError: MethodError: no method matching pdf(::Tuple{State, Float64}, ::State)
[0mClosest candidates are:
[0m  pdf([91m::POMDPTools.BeliefUpdaters.DiscreteBelief[39m, ::Any) at ~/.julia/packages/POMDPTools/TTBlb/src/BeliefUpdaters/discrete.jl:59
[0m  pdf([91m::Uniform[39m, ::Any) at ~/.julia/packages/POMDPTools/TTBlb/src/POMDPDistributions/uniform.jl:29
[0m  pdf([91m::UnsafeUniform[39m, ::Any) at ~/.julia/packages/POMDPTools/TTBlb/src/POMDPDistributions/uniform.jl:50
[0m  ...