In [1]:
using Revise

In [2]:
using PricingMDP

using StaticArrays
using POMDPs
using MCTS, DiscreteValueIteration
using StatsBase
using Plots
using POMDPSimulators
using D3Trees
using POMDPPolicies
using Random
using POMDPModelTools

# Makesim investigations

In [3]:
rng = MersenneTwister(123)
mdp_params = Dict(:demand => Float64[4,4], :selling_horizon_end => [25,30])
mcts_params = Dict(:n_iterations=>5000, 
    :depth=>30, 
    :exploration_constant=>30.0, 
    :enable_tree_vis=>true, 
    :estimate_value=>RolloutEstimator(RandomSolver(rng)))
params = Dict(:mdp=>mdp_params, :mcts=>mcts_params)

# r, h, mmc, mvi, policy, planner = makesim(params; n_runs = 3)

Dict{Symbol,Dict{Symbol,V} where V} with 2 entries:
  :mcts => Dict{Symbol,Any}(:n_iterations=>5000,:estimate_value=>RolloutEstimat…
  :mdp  => Dict{Symbol,Array{T,1} where T}(:selling_horizon_end=>[25, 30],:dema…

In [4]:
mdp_vi = PricingMDP.create_PMDP(PMDPe; params[:mdp]...)
mdp_mc = PricingMDP.create_PMDP(PMDPg; params[:mdp]...) 

policy = PricingMDP.get_VI_policy(mdp_vi)
planner = PricingMDP.get_MCTS_planner(mdp_mc, params[:mcts]);

┌ Info: residual: 17.999999999999996, time: 0.2090015, total time: 0.2090015 
│   i = 1
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\ZF0ln\src\sparse.jl:75
┌ Info: residual: 4.1579999999999995, time: 0.000192599, total time: 0.209194099 
│   i = 2
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\ZF0ln\src\sparse.jl:75
┌ Info: residual: 4.11642, time: 0.0001662, total time: 0.209360299 
│   i = 3
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\ZF0ln\src\sparse.jl:75
┌ Info: residual: 3.9661227783936006, time: 0.0001606, total time: 0.209520899 
│   i = 4
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\ZF0ln\src\sparse.jl:75
┌ Info: residual: 3.747031929488518, time: 0.000146, total time: 0.20966689900000002 
│   i = 5
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\ZF0ln\src\sparse.jl:75
┌ Info: residual: 3.488294253647559

In [6]:
s = State([2,1], 2,[0,1])

a_vi = action(policy, s) 
a, info = action_info(planner, s);

display("vi: $a_vi, mc: $a, VI Q-values:")
display(hcat(mdp_vi.actions, policy.qmat[stateindex(mdp_vi, s), :]))

D3Tree(info[:tree], init_expand=1)

"vi: 30.0, mc: 45.0, VI Q-values:"

5×2 Array{Float64,2}:
    0.0  13.5515
   15.0  32.9245
   30.0  39.484
   45.0  39.484
 1000.0  39.484

# Investigating details in planner

In [12]:
actions = Action[0,5,10,15,20,25,30,35,40,45,50,55,60,1000]
mdp_vi = PricingMDP.create_PMDP(PMDPe; actions=actions)
mdp_mc = PricingMDP.create_PMDP(PMDPg; actions=actions) 

policy = PricingMDP.get_VI_policy(mdp_vi)
# planner = PricingMDP.get_MCTS_planner(mdp_mc)

┌ Info: residual: 18.0, time: 0.000460899, total time: 0.000460899 
│   i = 1
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\xZnqm\src\sparse.jl:75
┌ Info: residual: 5.346, time: 0.0003481, total time: 0.000808999 
│   i = 2
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\xZnqm\src\sparse.jl:75
┌ Info: residual: 5.292539999999999, time: 0.000326899, total time: 0.001135898 
│   i = 3
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\xZnqm\src\sparse.jl:75
┌ Info: residual: 5.007789586800001, time: 0.000229, total time: 0.001364898 
│   i = 4
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\xZnqm\src\sparse.jl:75
┌ Info: residual: 4.635683207320177, time: 0.0004158, total time: 0.001780698 
│   i = 5
└ @ DiscreteValueIteration C:\Users\Honza\.julia\packages\DiscreteValueIteration\xZnqm\src\sparse.jl:75
┌ Info: residual: 4.227631138843005, time: 0.0003652, tot

ValueIterationPolicy:
 c:[0, 0]_t:0_p:Bool[0, 0] -> 0.0
 c:[1, 0]_t:0_p:Bool[0, 0] -> 0.0
 c:[2, 0]_t:0_p:Bool[0, 0] -> 0.0
 c:[0, 1]_t:0_p:Bool[0, 0] -> 0.0
 c:[1, 1]_t:0_p:Bool[0, 0] -> 0.0
 c:[2, 1]_t:0_p:Bool[0, 0] -> 0.0
 c:[0, 2]_t:0_p:Bool[0, 0] -> 0.0
 c:[1, 2]_t:0_p:Bool[0, 0] -> 0.0
 c:[2, 2]_t:0_p:Bool[0, 0] -> 0.0
 c:[0, 0]_t:1_p:Bool[0, 0] -> 0.0
 c:[1, 0]_t:1_p:Bool[0, 0] -> 0.0
 c:[2, 0]_t:1_p:Bool[0, 0] -> 0.0
 c:[0, 1]_t:1_p:Bool[0, 0] -> 0.0
 c:[1, 1]_t:1_p:Bool[0, 0] -> 0.0
 c:[2, 1]_t:1_p:Bool[0, 0] -> 0.0
 c:[0, 2]_t:1_p:Bool[0, 0] -> 0.0
 c:[1, 2]_t:1_p:Bool[0, 0] -> 0.0
 c:[2, 2]_t:1_p:Bool[0, 0] -> 0.0
 c:[0, 0]_t:2_p:Bool[0, 0] -> 0.0
 c:[1, 0]_t:2_p:Bool[0, 0] -> 0.0
 c:[2, 0]_t:2_p:Bool[0, 0] -> 0.0
 c:[0, 1]_t:2_p:Bool[0, 0] -> 0.0
 c:[1, 1]_t:2_p:Bool[0, 0] -> 0.0
 c:[2, 1]_t:2_p:Bool[0, 0] -> 0.0
 c:[0, 2]_t:2_p:Bool[0, 0] -> 0.0
 …

In [124]:
for i in 1:10
    s_i = rand(rng, 1:length(mdp_vi.states))
    s = mdp_vi.states[s_i]
    a_vi = action(policy, s)
    a_mc = action(planner, s)
    println("$s: a_vi=$a_vi, a_mc=$a_mc")
end

c:[1, 1]_t:5_p:Bool[1, 1]: a_vi=45.0, a_mc=30.0
c:[0, 1]_t:9_p:Bool[0, 0]: a_vi=1000.0, a_mc=1000.0
c:[0, 0]_t:9_p:Bool[0, 1]: a_vi=0.0, a_mc=1000.0
c:[0, 1]_t:3_p:Bool[0, 1]: a_vi=15.0, a_mc=15.0
c:[2, 2]_t:0_p:Bool[1, 0]: a_vi=15.0, a_mc=15.0
c:[1, 1]_t:6_p:Bool[1, 0]: a_vi=15.0, a_mc=15.0
c:[1, 2]_t:0_p:Bool[0, 1]: a_vi=15.0, a_mc=15.0
c:[1, 0]_t:6_p:Bool[1, 1]: a_vi=1000.0, a_mc=1000.0
c:[0, 0]_t:6_p:Bool[1, 0]: a_vi=0.0, a_mc=1000.0
c:[1, 0]_t:7_p:Bool[1, 0]: a_vi=15.0, a_mc=15.0


In [16]:
solver = MCTSSolver(n_iterations=5000,
                    depth=1, 
                    exploration_constant=50.0, 
                    enable_tree_vis=true)
planner = solve(solver, mdp_mc);

In [17]:
PricingMDP.compare_actions(mdp_vi, policy, planner; rng_seed=123, verbose=true)

								 Actions: [0.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0, 1000.0]
c:[1, 0]_t:0_p:Bool[1, 0]: a_vi=20.0, a_mc=15.0, q_Δ=0.22641273018370178 Q:[0.0, 5.0, 9.226412730183702, 11.452825460367405, 11.679238190551107, 9.90565092073481, 6.132063650918513, 6.132063650918513, 6.132063650918513, 6.132063650918513, 6.132063650918513, 6.132063650918513, 6.132063650918513, 6.132063650918513]
c:[1, 1]_t:0_p:Bool[1, 0]: a_vi=25.0, a_mc=15.0, q_Δ=3.446561960810307 Q:[6.132063650918514, 11.132063650918514, 17.855344631323668, 22.57862561172882, 25.301906592133975, 26.025187572539128, 24.748468552944278, 24.748468552944278, 24.748468552944278, 24.748468552944278, 24.748468552944278, 24.748468552944278, 24.748468552944278, 24.748468552944278]
c:[1, 2]_t:0_p:Bool[1, 0]: a_vi=25.0, a_mc=15.0, q_Δ=4.680789464500059 Q:[7.494638807997981, 12.49463880799798, 19.835033540248006, 25.175428272498035, 28.51582300474806, 29.856217736998094, 29.196612469248116, 29.196612469248116,

(75, 1.6930255067200173, 7.466666666666667)

In [14]:
# function my_N(mdp, s, a)
#     s.p == mdp.empty_product ? 1000 : 0
# end

# state = initialstate(mdp, Random.MersenneTwister(4))

s = State([1,1], 5,[1,1])
# s = State([1,1,1], 69,[1,1,1])
# s = mean(POMDPs.initialstate(mdp_mc))

a_vi = action(policy, s) 
a, info = action_info(planner, s);

display("vi: $a_vi, mc: $a, Q-values:")
display( hcat(mdp_vi.actions, policy.qmat[stateindex(mdp_vi, s), :]))

D3Tree(info[:tree], init_expand=1)

"vi: 40.0, mc: 35.0, Q-values:"

14×2 Array{Float64,2}:
    0.0   0.0
    5.0   5.0
   10.0  10.0
   15.0  15.0704
   20.0  19.1408
   25.0  22.2112
   30.0  24.2816
   35.0  25.352
   40.0  25.4223
   45.0  24.4927
   50.0  22.5631
   55.0  19.6335
   60.0  15.7039
 1000.0  15.7039

In [25]:
solver = DPWSolver(n_iterations=100, 
                    depth=11, 
                    exploration_constant=1.0)
planner = solve(solver, mdp_mc);
# state = initialstate(mdp, Random.MersenneTwister(4))

In [26]:
s = mean(POMDPs.initialstate(mdp_mc))

a, info = action_info(planner, s);
D3Tree(info[:tree], init_expand=1)

KeyError: KeyError: key :tree not found