From 9e595740a7e5c1f14a29775ae1b97f3af8d022e6 Mon Sep 17 00:00:00 2001 From: Spencer Lyon Date: Mon, 14 Mar 2016 13:13:47 -0400 Subject: [PATCH] ddp-sa test pass! --- src/markov/ddp.jl | 16 ++--- test/test_ddp.jl | 166 +++++++++++++++++++++++++++++++--------------- 2 files changed, 116 insertions(+), 66 deletions(-) diff --git a/src/markov/ddp.jl b/src/markov/ddp.jl index 8214f7f6..f5d1e911 100644 --- a/src/markov/ddp.jl +++ b/src/markov/ddp.jl @@ -66,7 +66,7 @@ type DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind} msg = "R must be 2-dimensional without s-a formulation" throw(ArgumentError(msg)) end - beta < 0 || beta >= 1 && throw(ArgumentError("beta must be [0, 1)")) + (beta < 0 || beta >= 1) && throw(ArgumentError("beta must be [0, 1)")) # verify input integrity 2 num_states, num_actions = size(R) @@ -102,7 +102,7 @@ type DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind} if NR != 1 throw(ArgumentError("R must be 1-dimensional with s-a formulation")) end - beta < 0 || beta >= 1 && throw(ArgumentError("beta must be [0, 1)")) + (beta < 0 || beta >= 1) && throw(ArgumentError("beta must be [0, 1)")) # verify input integrity (same length) num_sa_pairs, num_states = size(Q) @@ -128,7 +128,7 @@ type DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind} n = maximum(s_indices) msg = "Duplicate s-a pair found" as_ptr = sparse(a_indices, s_indices, 1:num_sa_pairs, m, n, - (x,y)->error(msg)) + (x,y)->throw(ArgumentError(msg))) a_indices = as_ptr.rowval a_indptr = as_ptr.colptr @@ -541,8 +541,8 @@ s_wise_max!(vals::AbstractMatrix, out::Vector) = (println("calling this one! "); Populate `out` with `max_a vals(s, a)`, where `vals` is represented as a `AbstractMatrix` of size `(num_states, num_actions)`. -Also fills `out_argmax` with the linear index associated with the indmax in each -row +Also fills `out_argmax` with the column number associated with the indmax in +each row """ function s_wise_max!(vals::AbstractMatrix, out::Vector, out_argmax::Vector) # naive implementation where I just iterate over the rows @@ -562,11 +562,6 @@ function s_wise_max!(vals::AbstractMatrix, out::Vector, out_argmax::Vector) end end - # HACK: convert to linear index for intermediate testing - # sv = size(vals) - # for (i, c) in enumerate(out_argmax) - # out_argmax[i] = sub2ind(sv, i, c) - # end out, out_argmax end @@ -616,7 +611,6 @@ function s_wise_max!(a_indices::Vector, a_indptr::Vector, vals::Vector, if a_indptr[i] != a_indptr[i+1] m = a_indptr[i] for j in a_indptr[i]+1:(a_indptr[i+1]-1) - @show i, j, m, vals[j], vals[m] if vals[j] > vals[m] m = j end diff --git a/test/test_ddp.jl b/test/test_ddp.jl index 73abae53..f5521513 100644 --- a/test/test_ddp.jl +++ b/test/test_ddp.jl @@ -37,7 +37,7 @@ Tests for markov/ddp.jl ddp0_sa = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices) # List of ddp formulations - ddp0_collection = (ddp0,) + ddp0_collection = (ddp0, ddp0_sa) # Maximum Iteration and Epsilon for Tests max_iter = 200 @@ -47,15 +47,16 @@ Tests for markov/ddp.jl v_star = [(5-5.5*beta)/((1-0.5*beta)*(1-beta)), -1/(1-beta)] sigma_star = [1, 1] - @testset "test bellman_operator methods" begin + @testset "bellman_operator methods" begin # Check both Dense and State-Action Pair Formulation - for ddp_item in ddp0_collection - @test isapprox(bellman_operator(ddp_item, v_star), v_star) + for ddp in ddp0_collection + @test isapprox(bellman_operator(ddp, v_star), v_star) end end - @testset "test RQ_sigma" begin + @testset "RQ_sigma" begin nr, nc = size(R) + # test for DDP sigmas = ([1, 1], [1, 2], [2, 1], [2, 2]) for sig in sigmas r, q = RQ_sigma(ddp0, sig) @@ -67,63 +68,67 @@ Tests for markov/ddp.jl end end end + + # TODO: add test for DDPsa end - @testset "test compute_greedy methods" begin + @testset "compute_greedy methods" begin # Check both Dense and State-Action Pair Formulation - for ddp_item in ddp0_collection - @test compute_greedy(ddp_item, v_star) == sigma_star + for ddp in ddp0_collection + @test compute_greedy(ddp, v_star) == sigma_star end end - @testset "test evaluate_policy methods" begin + @testset "evaluate_policy methods" begin # Check both Dense and State-Action Pair Formulation - for ddp_item in ddp0_collection - @test isapprox(evaluate_policy(ddp_item, sigma_star), v_star) + for ddp in ddp0_collection + @test isapprox(evaluate_policy(ddp, sigma_star), v_star) end end - @testset "test methods for subtypes != (Float64, Int)" begin + @testset "methods for subtypes != (Float64, Int)" begin float_types = [Float16, Float32, Float64, BigFloat] int_types = [Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128] - for f in (bellman_operator, compute_greedy) - for T in float_types - f_f64 = f(ddp0, [1.0, 1.0]) - f_T = f(ddp0, ones(T, 2)) - @test isapprox(f_f64, convert(Vector{eltype(f_f64)}, f_T)) - end + for ddp in ddp0_collection + for f in (bellman_operator, compute_greedy) + for T in float_types + f_f64 = f(ddp, [1.0, 1.0]) + f_T = f(ddp, ones(T, 2)) + @test isapprox(f_f64, convert(Vector{eltype(f_f64)}, f_T)) + end - # only Integer subtypes can be Rational type params - # NOTE: Only the integer types below don't overflow for this example - for T in [Int64, Int128] - @test f(ddp0, [1//1, 1//1]) == f(ddp0, ones(Rational{T}, 2)) + # only Integer subtypes can be Rational type params + # NOTE: Only the integer types below don't overflow for this example + for T in [Int64, Int128] + @test f(ddp, [1//1, 1//1]) == f(ddp, ones(Rational{T}, 2)) + end end - end - for T in float_types, S in int_types - v = ones(T, 2) - s = ones(S, 2) - # just test that we can call the method and the result is - # deterministic - @test bellman_operator!(ddp0, v, s) == bellman_operator!(ddp0, v, s) - end + for T in float_types, S in int_types + v = ones(T, 2) + s = ones(S, 2) + # just test that we can call the method and the result is + # deterministic + @test bellman_operator!(ddp, v, s) == bellman_operator!(ddp, v, s) + end - for T in int_types - s = T[1, 1] - @test isapprox(evaluate_policy(ddp0, s), v_star) + for T in int_types + s = T[1, 1] + @test isapprox(evaluate_policy(ddp, s), v_star) + end end end - @testset "test compute_greedy! changes ddpr.v" begin + @testset "compute_greedy! changes ddpr.v" begin res = solve(ddp0, VFI) res.Tv[:] = 500.0 compute_greedy!(ddp0, res) @test maxabs(res.Tv - 500.0) > 0 end - @testset "test value_iteration" begin + @testset "value_iteration" begin # Check both Dense and State-Action Pair Formulation for ddp_item in ddp0_collection # Compute Result @@ -142,7 +147,7 @@ Tests for markov/ddp.jl end end - @testset "test policy_iteration" begin + @testset "policy_iteration" begin # Check both Dense and State-Action Pair Formulation for ddp_item in ddp0_collection res = solve(ddp_item, PFI) @@ -159,7 +164,7 @@ Tests for markov/ddp.jl end end - @testset "test DiscreteDP{Rational,_,_,Rational} maintains Rational" begin + @testset "DiscreteDP{Rational,_,_,Rational} maintains Rational" begin ddp_rational = DiscreteDP(map(Rational{BigInt}, R), map(Rational{BigInt}, Q), map(Rational{BigInt}, beta)) @@ -170,7 +175,7 @@ Tests for markov/ddp.jl @test eltype(solve(ddp_rational, vi, MPFI; max_iter=1, k=1, epsilon=Inf).v) == Rational{BigInt} end - @testset "test DiscreteDP{Rational{BigInt},_,_,Rational{BigInt}} works" begin + @testset "DiscreteDP{Rational{BigInt},_,_,Rational{BigInt}} works" begin ddp_rational = DiscreteDP(map(Rational{BigInt}, R), map(Rational{BigInt}, Q), map(Rational{BigInt}, beta)) @@ -185,7 +190,7 @@ Tests for markov/ddp.jl @test r1.mc.p == r3.mc.p end - @testset "test modified_policy_iteration" begin + @testset "modified_policy_iteration" begin for ddp_item in ddp0_collection res = solve(ddp_item, MPFI) v_init = [0.0, 1.0] @@ -211,27 +216,78 @@ Tests for markov/ddp.jl end end - @testset "test ddp_no_feasible_action_error" begin - #Dense Matrix - n, m = 2, 2 - R = [-Inf -Inf; 1.0 2.0] + @testset "DDPsa constructor" begin + @testset "feasbile action pair" begin + _R = [1.0, 0.0, 0.0, 1.0] + _Q = fill(1/3, 4, 3) + _s_ind = [1, 1, 3, 3] + _a_ind = [1, 2, 1, 2] + @test_throws ArgumentError DiscreteDP(_R, _Q, beta, _s_ind, _a_ind) + end - Q = Array(Float64, n, m, n) - Q[:, :, 1] = [0.5 0.0; 0.0 0.0] - Q[:, :, 2] = [0.5 1.0; 1.0 1.0] - beta = 0.95 + _R, _Q = R_sa, Q_sa + _s_ind = [1, 1, 2] + _a_ind = [1, 2, 1] - @test_throws ArgumentError DiscreteDP(R, Q, beta) + @testset "beta in [0, 1)" begin + @test_throws ArgumentError DiscreteDP(_R, _Q, -eps(), _s_ind, _a_ind) + @test_throws ArgumentError DiscreteDP(_R, _Q, 1.0, _s_ind, _a_ind) + @test_throws ArgumentError DiscreteDP(_R, _Q, 1+eps(), _s_ind, _a_ind) + end - # # State-Action Pair Formulation - # s_indices = [1, 1, 3, 3] - # a_indices = [1, 2, 1, 2] - # #TODO: @sglyon We need to construct R_sa, Q_sa right? - # - # @test_throws ArgumentError DiscreteDP(R, Q, beta, s_indices, a_indices) + @testset "argument sizes" begin + # NQ != 2 + @test_throws ArgumentError DiscreteDP(_R, rand(4, 3, 1), beta, _s_ind, _a_ind) + + # NR != 1 + @test_throws ArgumentError DiscreteDP(rand(4, 1), _Q, beta, _s_ind, _a_ind) + + # incorrect lengths + @test_throws ArgumentError DiscreteDP(rand(2), _Q, beta, _s_ind, _a_ind) + @test_throws ArgumentError DiscreteDP(_R, rand(5, 2), beta, _s_ind, _a_ind) + @test_throws ArgumentError DiscreteDP(_R, _Q, beta, rand(1:3, 2), _a_ind) + @test_throws ArgumentError DiscreteDP(_R, _Q, beta, _s_ind, rand(1:3, 2)) + end + + @testset "duplicate sa pair" begin + @test_throws ArgumentError DiscreteDP(_R, _Q, beta, _s_ind, [1, 1, 2]) + end + end + + @testset "DDP constructor" begin + @testset "beta in [0, 1)" begin + @test_throws ArgumentError DiscreteDP(R, Q, -eps()) + @test_throws ArgumentError DiscreteDP(R, Q, 1.0) + @test_throws ArgumentError DiscreteDP(R, Q, 1+eps()) + end + + @testset "feasbile action pair" begin + #Dense Matrix + n, m = 2, 2 + _R = [-Inf -Inf; 1.0 2.0] + + _Q = Array(Float64, n, m, n) + _Q[:, :, 1] = [0.5 0.0; 0.0 0.0] + _Q[:, :, 2] = [0.5 1.0; 1.0 1.0] + _beta = 0.95 + + @test_throws ArgumentError DiscreteDP(_R, _Q, _beta) + end + + @testset "R, Q sizes" begin + # NQ != 3 + @test_throws ArgumentError DiscreteDP(R, zeros(2, 2), beta) + + # NR != 2 + @test_throws ArgumentError DiscreteDP(zeros(1), Q, beta) + + # incompatible dimensions + @test_throws ArgumentError DiscreteDP(zeros(2, 3), Q, beta) + @test_throws ArgumentError DiscreteDP(R, zeros(2, 3, 2), beta) + end end - @testset "test ddp_negative_inf_error()" begin + @testset "ddp_negative_inf_error()" begin # Dense Matrix n, m = 3, 2 R = [0 1;