From 0c1ef5b027c38dcc70f056e1925ab8e73c21d6ab Mon Sep 17 00:00:00 2001
From: Spencer Lyon <spencerlyon2@gmail.com>
Date: Mon, 25 Jan 2016 09:41:55 -0500
Subject: [PATCH] refactor ddp types

---
 src/markov/ddp.jl | 326 ++++++++++++++++++++++++++--------------------
 1 file changed, 188 insertions(+), 138 deletions(-)

diff --git a/src/markov/ddp.jl b/src/markov/ddp.jl
index 5731582c..81d6648f 100644
--- a/src/markov/ddp.jl
+++ b/src/markov/ddp.jl
@@ -31,28 +31,43 @@ DiscreteDP type for specifying paramters for discrete dynamic programming model
 - `R::Array{T,NR}` : Reward Array
 - `Q::Array{T,NQ}` : Transition Probability Array
 - `beta::Float64`  : Discount Factor
+- `s_indices::Nullable{Vector{Tind}}`: State Indices. Null unless using
+  SA formulation
+- `a_indices::Nullable{Vector{Tind}}`: Action Indices. Null unless using
+  SA formulation
+- `a_indptr::Nullable{Vector{Tind}}`: Action Index Pointers. Null unless using
+  SA formulation
 
 ##### Returns
 
 - `ddp::DiscreteDP` : DiscreteDP object
 
 """
-
-abstract AbstractDiscreteDP{T}
-
-type DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real} <: AbstractDiscreteDP{T}
-    R::Array{T,NR}  #-Reward Array-#
-    Q::Array{T,NQ}  #-Transition Probability Array-#
-    beta::Tbeta     #-Discount Factor-#
+type DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real,Tind}
+    R::Array{T,NR}                     # Reward Array
+    Q::Array{T,NQ}                     # Transition Probability Array
+    beta::Tbeta                        # Discount Factor
+    s_indices::Nullable{Vector{Tind}}  # State Indices
+    a_indices::Nullable{Vector{Tind}}  # Action Indices
+    a_indptr::Nullable{Vector{Tind}}   # Action Index Pointers
 
     function DiscreteDP(R::Array, Q::Array, beta::Real)
         # verify input integrity 1
-        NQ != 3 && throw(ArgumentError("Q must be 3-dimensional without s-a formulation"))
-        NR != 2 && throw(ArgumentError("R must be 2-dimensional without s-a formulation"))
+        if NQ != 3
+            msg = "Q must be 3-dimensional without s-a formulation"
+            throw(ArgumentError(msg))
+        end
+        if NR != 2
+            msg = "R must be 2-dimensional without s-a formulation"
+            throw(ArgumentError(msg))
+        end
         beta < 0 || beta >= 1 &&  throw(ArgumentError("beta must be [0, 1)"))
+
         # verify input integrity 2
         num_states, num_actions = size(R)
-        (size(Q) != (num_states,num_actions,num_states)) && throw(ArgumentError("shapes of R and Q must be (n,m) and (n,m,n)"))
+        if size(Q) != (num_states, num_actions, num_states)
+            throw(ArgumentError("shapes of R and Q must be (n,m) and (n,m,n)"))
+        end
 
         # check feasibility
         R_max = _s_wise_max(R)
@@ -63,70 +78,96 @@ type DiscreteDP{T<:Real,NQ,NR,Tbeta<:Real} <: AbstractDiscreteDP{T}
                 some action: violated for state $s"))
         end
 
-        new(R, Q, beta)
-    end
-end
+        # here the indices and indptr are null.
+        s_indices = Nullable{Vector{Int}}()
+        a_indices = Nullable{Vector{Int}}()
+        a_indptr = Nullable{Vector{Int}}()
 
-type DiscreteDP_sa{T<:Real,NQ,NR,Tbeta<:Real,Tind<:Integer} <: AbstractDiscreteDP{T}
-    R::Array{T,NR}             #-Reward Array-#
-    Q::Array{T,NQ}             #-Transition Probability Array-#
-    beta::Tbeta                #-Discount Factor-#
-    s_indices::Vector{Tind}    #-State Indices-#
-    a_indices::Vector{Tind}    #-Action Indices-#
-    a_indptr::Vector{Tind}     #-Action Index Pointers-#
+        new(R, Q, beta, s_indices, a_indices, a_indptr)
+    end
 
-    function DiscreteDP_sa(R::Array, Q::Array, beta::Real, s_indices::Vector, a_indices::Vector)
+    function DiscreteDP(R::Array, Q::Array, beta::Real, s_indices::Vector,
+                        a_indices::Vector)
         # verify input integrity 1
-        NQ != 2 && throw(ArgumentError("Q must be 2-dimensional with s-a formulation"))
-        NR != 1 && throw(ArgumentError("R must be 1-dimensional with s-a formulation"))
+        if NQ != 2
+            throw(ArgumentError("Q must be 2-dimensional with s-a formulation"))
+        end
+        if NR != 1
+            throw(ArgumentError("R must be 1-dimensional with s-a formulation"))
+        end
         beta < 0 || beta >= 1 &&  throw(ArgumentError("beta must be [0, 1)"))
+
         # verify input integrity 2
         num_sa_pairs, num_states = size(Q)
-        length(R) != num_sa_pairs && throw(ArgumentError("shapes of R and Q must be (L,) and (L,n)"))
-        ([length(s_indices); length(a_indices)] != fill(num_sa_pairs,2)) && throw(ArgumentError("length of s_indices and a_indices must be equal to the number of s-a pairs"))
+        if length(R) != num_sa_pairs
+            throw(ArgumentError("shapes of R and Q must be (L,) and (L,n)"))
+        end
+        if length(s_indices) != num_sa_pairs
+            msg = "length of s_indices must be equal to the number of s-a pairs"
+            throw(ArgumentError(msg))
+        end
+        if length(a_indices) != num_sa_pairs
+            msg = "length of a_indices must be equal to the number of s-a pairs"
+            throw(ArgumentError(msg))
+        end
 
-        if _has_sorted_sa_indices(s_indices,a_indices)
-          a_indptr = Array(Int64, num_states+1)
-          _generate_a_indptr!(num_states, s_indices, a_indptr)
+
+        if _has_sorted_sa_indices(s_indices, a_indices)
+            a_indptr = Array(Int64, num_states+1)
+            _generate_a_indptr!(num_states, s_indices, a_indptr)
         else
-          # transpose matrix to use Julia's CSC; now rows are actions and columns are states (this is why it's called as_ptr instead of sa_ptr)
-          as_ptr = sparse(a_indices, s_indices, collect(1:num_sa_pairs))
-          a_indices = as_ptr.rowval
-          a_indptr = as_ptr.colptr
-
-          R = R[as_ptr.nzval]
-          Q = Q[as_ptr.nzval,:]
-
-          _s_indices = Array(eltype(a_indices),num_sa_pairs)
-          for i in 1:num_states, j in a_indptr[i]:(a_indptr[i+1]-1)
-            _s_indices[j] = i
-          end
-          copy!(s_indices,_s_indices)
+            # transpose matrix to use Julia's CSC; now rows are actions and
+            # columns are states (this is why it's called as_ptr not sa_ptr)
+            as_ptr = sparse(a_indices, s_indices, collect(1:num_sa_pairs))
+            a_indices = as_ptr.rowval
+            a_indptr = as_ptr.colptr
+
+            R = R[as_ptr.nzval]
+            Q = Q[as_ptr.nzval, :]
+
+            _s_indices = Array(eltype(a_indices), num_sa_pairs)
+            for i in 1:num_states, j in a_indptr[i]:(a_indptr[i+1]-1)
+                _s_indices[j] = i
+            end
+            copy!(s_indices, _s_indices)
         end
 
         # check feasibility
-        diff = a_indptr[2:end]-a_indptr[1:end-1]
-        if any(diff .== 0.0)
+        aptr_diff = diff(a_indptr)
+        if any(aptr_diff .== 0.0)
             # First state index such that no action is available
-            s = find(diff .== 0.0) #-Only Gives True
+            s = find(aptr_diff .== 0.0)  # Only Gives True
             throw(ArgumentError("for every state at least one action
                 must be available: violated for state $s"))
         end
 
+        # package into nullables before constructing type
+        s_indices = Nullable{Vector{Tind}}(s_indices)
+        a_indices = Nullable{Vector{Tind}}(a_indices)
+        a_indptr = Nullable{Vector{Tind}}(a_indptr)
+
         new(R, Q, beta, s_indices, a_indices, a_indptr)
     end
 end
 
-# necessary for dispatch to fill in type parameters {T,NQ,NR,Tbeta}
+# necessary for dispatch to fill in type parameters {T,NQ,NR,Tbeta}. Notice
+# that we set `Tind=Int` in this case, which is ok because
 DiscreteDP{T,NQ,NR,Tbeta}(R::Array{T,NR}, Q::Array{T,NQ}, beta::Tbeta) =
-    DiscreteDP{T,NQ,NR,Tbeta}(R, Q, beta)
+    DiscreteDP{T,NQ,NR,Tbeta,Int}(R, Q, beta)
+
+function DiscreteDP{T,NQ,NR,Tbeta,Tind}(R::Array{T,NR}, Q::Array{T,NQ},
+                                        beta::Tbeta, s_indices::Vector{Tind},
+                                        a_indices::Vector{Tind})
+    DiscreteDP{T,NQ,NR,Tbeta,Tind}(R, Q, beta, s_indices, a_indices)
+end
 
-DiscreteDP{T,NQ,NR,Tbeta,Tind}(R::Array{T,NR}, Q::Array{T,NQ}, beta::Tbeta, s_indices::Vector{Tind}, a_indices::Vector{Tind}) =
-        DiscreteDP_sa{T,NQ,NR,Tbeta,Tind}(R, Q, beta, s_indices, a_indices)
+# create an alias for each of the common formulations
+typealias DDP{T,Tbeta,Tind} DiscreteDP{T,3,2,Tbeta,Tind}
+typealias DDPsa{T,Tbeta,Tind} DiscreteDP{T,2,1,Tbeta,Tind}
 
 #~Property Functions~#
-num_states(ddp::DiscreteDP) = size(ddp.R, 1)
-num_states(ddp::DiscreteDP_sa) = size(ddp.Q, 2)
+num_states(ddp::DDP) = size(ddp.R, 1)
+num_states(ddp::DDPsa) = size(ddp.Q, 2)
 num_actions(ddp::DiscreteDP) = size(ddp.R, 2)
 
 abstract DDPAlgorithm
@@ -140,7 +181,7 @@ solving the model
 
 ##### Parameters
 
-- `ddp::AbstractDiscreteDP` : DiscreteDP object
+- `ddp::DiscreteDP` : DiscreteDP object
 
 ##### Returns
 
@@ -154,8 +195,8 @@ type DPSolveResult{Algo<:DDPAlgorithm,Tval<:Real}
     sigma::Array{Int,1}
     mc::MarkovChain
 
-    function DPSolveResult(ddp::AbstractDiscreteDP)
-        v = s_wise_max(ddp, ddp.R)                         #Initialise v with v_init
+    function DPSolveResult(ddp::DiscreteDP)
+        v = s_wise_max(ddp, ddp.R) # Initialise v with v_init
         ddpr = new(v, similar(v), 0, similar(v, Int))
 
         # fill in sigma with proper values
@@ -164,7 +205,7 @@ type DPSolveResult{Algo<:DDPAlgorithm,Tval<:Real}
     end
 
     # method to pass initial value function (skip the s-wise max)
-    function DPSolveResult(ddp::AbstractDiscreteDP, v::Vector)
+    function DPSolveResult(ddp::DiscreteDP, v::Vector)
         ddpr = new(v, similar(v), 0, similar(v, Int))
 
         # fill in sigma with proper values
@@ -183,7 +224,7 @@ for a value function v.
 
 ##### Parameters
 
-- `ddp::AbstractDiscreteDP` : Object that contains the model parameters
+- `ddp::DiscreteDP` : Object that contains the model parameters
 - `v::Vector{T<:AbstractFloat}`: The current guess of the value function
 - `Tv::Vector{T<:AbstractFloat}`: A buffer array to hold the updated value
   function. Initial value not used and will be overwritten
@@ -195,7 +236,7 @@ for a value function v.
 - `Tv::Vector` : Updated value function vector
 - `sigma::Vector` : Updated policiy function vector
 """
-function bellman_operator!(ddp::AbstractDiscreteDP, v::Vector, Tv::Vector, sigma::Vector)
+function bellman_operator!(ddp::DiscreteDP, v::Vector, Tv::Vector, sigma::Vector)
     vals = ddp.R + ddp.beta * ddp.Q * v
     s_wise_max!(ddp, vals, Tv, sigma)
     Tv, sigma
@@ -209,7 +250,7 @@ Apply the Bellman operator using `v=ddpr.v`, `Tv=ddpr.Tv`, and `sigma=ddpr.sigma
 Updates `ddpr.Tv` and `ddpr.sigma` inplace
 
 """
-bellman_operator!(ddp::AbstractDiscreteDP, ddpr::DPSolveResult) =
+bellman_operator!(ddp::DiscreteDP, ddpr::DPSolveResult) =
     bellman_operator!(ddp, ddpr.v, ddpr.Tv, ddpr.sigma)
 
 """
@@ -221,7 +262,7 @@ corresponding policy rule
 
 ##### Parameters
 
-- `ddp::AbstractDiscreteDP`: The ddp model
+- `ddp::DiscreteDP`: The ddp model
 - `v::Vector{T<:AbstractFloat}`: The current guess of the value function. This
   array will be overwritten
 - `sigma::Vector`: A buffer array to hold the policy function. Initial
@@ -232,11 +273,10 @@ corresponding policy rule
 - `Tv::Vector`: Updated value function vector
 - `sigma::Vector{T<:Integer}`: Policy rule
 """
-bellman_operator!{T<:AbstractFloat}(ddp::AbstractDiscreteDP, v::Vector{T}, sigma::Vector) =
+bellman_operator!{T<:AbstractFloat}(ddp::DiscreteDP, v::Vector{T}, sigma::Vector) =
     bellman_operator!(ddp, v, v, sigma)
 
 # method to allow dispatch on rationals
-# TODO: add a test for this
 # TODO from albep: not sure how to update this to the state-action pair formulation
 function bellman_operator!{T1<:Rational,T2<:Rational,NR,NQ,T3<:Rational}(ddp::DiscreteDP{T1,NR,NQ,T2},
                                                                          v::Vector{T3},
@@ -250,14 +290,14 @@ for a given value function v.
 
 ##### Parameters
 
-- `ddp::AbstractDiscreteDP`: The ddp model
+- `ddp::DiscreteDP`: The ddp model
 - `v::Vector`: The current guess of the value function
 
 ##### Returns
 
 - `Tv::Vector` : Updated value function vector
 """
-bellman_operator(ddp::AbstractDiscreteDP, v::Vector) =
+bellman_operator(ddp::DiscreteDP, v::Vector) =
     s_wise_max(ddp, ddp.R + ddp.beta * ddp.Q * v)
 
 # ---------------------- #
@@ -269,7 +309,7 @@ Compute the v-greedy policy
 
 ##### Parameters
 
-- `ddp::AbstractDiscreteDP` : Object that contains the model parameters
+- `ddp::DiscreteDP` : Object that contains the model parameters
 - `ddpr::DPSolveResult` : Object that contains result variables
 
 ##### Returns
@@ -281,10 +321,10 @@ Compute the v-greedy policy
 modifies ddpr.sigma and ddpr.Tv in place
 
 """
-compute_greedy!(ddp::AbstractDiscreteDP, ddpr::DPSolveResult) =
+compute_greedy!(ddp::DiscreteDP, ddpr::DPSolveResult) =
     (bellman_operator!(ddp, ddpr); ddpr.sigma)
 
-function compute_greedy{TV<:Real}(ddp::AbstractDiscreteDP, v::Vector{TV})
+function compute_greedy{TV<:Real}(ddp::DiscreteDP, v::Vector{TV})
     Tv = similar(v)
     sigma = ones(Int, length(v))
     bellman_operator!(ddp, v, Tv, sigma)
@@ -300,7 +340,7 @@ Method of `evaluate_policy` that extracts sigma from a `DPSolveResult`
 
 See other docstring for details
 """
-evaluate_policy(ddp::AbstractDiscreteDP, ddpr::DPSolveResult) =
+evaluate_policy(ddp::DiscreteDP, ddpr::DPSolveResult) =
     evaluate_policy(ddp, ddpr.sigma)
 
 """
@@ -308,7 +348,7 @@ Compute the value of a policy.
 
 ##### Parameters
 
-- `ddp::AbstractDiscreteDP` : Object that contains the model parameters
+- `ddp::DiscreteDP` : Object that contains the model parameters
 - `sigma::Vector{T<:Integer}` : Policy rule vector
 
 ##### Returns
@@ -316,7 +356,7 @@ Compute the value of a policy.
 - `v_sigma::Array{Float64}` : Value vector of `sigma`, of length n.
 
 """
-function evaluate_policy{T<:Integer}(ddp::AbstractDiscreteDP, sigma::Vector{T})
+function evaluate_policy{T<:Integer}(ddp::DiscreteDP, sigma::Vector{T})
     R_sigma, Q_sigma = RQ_sigma(ddp, sigma)
     b = R_sigma
     A = I - ddp.beta * Q_sigma
@@ -332,7 +372,7 @@ Solve the dynamic programming problem.
 
 ##### Parameters
 
-- `ddp::AbstractDiscreteDP` : Object that contains the Model Parameters
+- `ddp::DiscreteDP` : Object that contains the Model Parameters
 - `method::Type{T<Algo}(VFI)`: Type name specifying solution method. Acceptable
 arguments are `VFI` for value function iteration or `PFI` for policy function
 iteration or `MPFI` for modified policy function iteration
@@ -347,7 +387,7 @@ policy iteration (irrelevant for other methods).
 - `ddpr::DPSolveResult{Algo}` : Optimization result represented as a
 DPSolveResult. See `DPSolveResult` for details.
 """
-function solve{Algo<:DDPAlgorithm,T}(ddp::AbstractDiscreteDP{T}, method::Type{Algo}=VFI;
+function solve{Algo<:DDPAlgorithm,T}(ddp::DiscreteDP{T}, method::Type{Algo}=VFI;
                                      max_iter::Integer=250, epsilon::Real=1e-3,
                                      k::Integer=20)
     ddpr = DPSolveResult{Algo,T}(ddp)
@@ -356,7 +396,7 @@ function solve{Algo<:DDPAlgorithm,T}(ddp::AbstractDiscreteDP{T}, method::Type{Al
     ddpr
 end
 
-function solve{Algo<:DDPAlgorithm,T}(ddp::AbstractDiscreteDP{T}, v_init::Vector{T},
+function solve{Algo<:DDPAlgorithm,T}(ddp::DiscreteDP{T}, v_init::Vector{T},
                                    method::Type{Algo}=VFI; max_iter::Integer=250,
                                    epsilon::Real=1e-3, k::Integer=20)
     ddpr = DPSolveResult{Algo,T}(ddp, v_init)
@@ -374,7 +414,7 @@ Returns the controlled Markov chain for a given policy `sigma`.
 
 ##### Parameters
 
-- `ddp::AbstractDiscreteDP` : Object that contains the model parameters
+- `ddp::DiscreteDP` : Object that contains the model parameters
 - `ddpr::DPSolveResult` : Object that contains result variables
 
 ##### Returns
@@ -382,7 +422,7 @@ Returns the controlled Markov chain for a given policy `sigma`.
 mc : MarkovChain
      Controlled Markov chain.
 """
-QuantEcon.MarkovChain(ddp::AbstractDiscreteDP, ddpr::DPSolveResult) =
+QuantEcon.MarkovChain(ddp::DiscreteDP, ddpr::DPSolveResult) =
     MarkovChain(RQ_sigma(ddp, ddpr)[2])
 
 """
@@ -390,7 +430,7 @@ Method of `RQ_sigma` that extracts sigma from a `DPSolveResult`
 
 See other docstring for details
 """
-RQ_sigma(ddp::AbstractDiscreteDP, ddpr::DPSolveResult) = RQ_sigma(ddp, ddpr.sigma)
+RQ_sigma(ddp::DiscreteDP, ddpr::DPSolveResult) = RQ_sigma(ddp, ddpr.sigma)
 
 """
 Given a policy `sigma`, return the reward vector `R_sigma` and
@@ -398,7 +438,7 @@ the transition probability matrix `Q_sigma`.
 
 ##### Parameters
 
-- `ddp::DiscreteDP or DiscreteDP_sa` : Object that contains the model parameters
+- `ddp::DiscreteDP` : Object that contains the model parameters
 - `sigma::Vector{Int}`: policy rule vector
 
 ##### Returns
@@ -409,7 +449,7 @@ the transition probability matrix `Q_sigma`.
   of shape (n, n).
 
 """
-function RQ_sigma{T<:Integer}(ddp::DiscreteDP, sigma::Array{T})
+function RQ_sigma{T<:Integer}(ddp::DDP, sigma::Array{T})
     R_sigma = ddp.R[sigma]
     # convert from linear index based on R to column number
     ind = map(x->ind2sub(size(ddp.R), x), sigma)
@@ -418,9 +458,9 @@ function RQ_sigma{T<:Integer}(ddp::DiscreteDP, sigma::Array{T})
 end
 
 # TODO: express it in a similar way as above to exploit Julia's column major order
-function RQ_sigma{T<:Integer}(ddp::DiscreteDP_sa, sigma::Array{T})
+function RQ_sigma{T<:Integer}(ddp::DDPsa, sigma::Array{T})
     sigma_indices = Array(T, num_states(ddp))
-    _find_indices!(ddp.a_indices, ddp.a_indptr, sigma, sigma_indices)
+    _find_indices!(get(ddp.a_indices), get(ddp.a_indptr), sigma, sigma_indices)
     R_sigma = ddp.R[sigma_indices]
     Q_sigma = ddp.Q[sigma_indices, :]
     return R_sigma, Q_sigma
@@ -430,10 +470,15 @@ end
 # Internal methods #
 # ---------------- #
 s_wise_max(ddp::DiscreteDP, vals::Matrix) = _s_wise_max(vals)
-s_wise_max(ddp::DiscreteDP_sa, vals::Vector) = _s_wise_max!(ddp.a_indices, ddp.a_indptr, vals, Array(Float64, num_states(ddp)))
+function s_wise_max(ddp::DDPsa, vals::Vector)
+    _s_wise_max!(get(ddp.a_indices), get(ddp.a_indptr), vals,
+                 Array(Float64, num_states(ddp)))
+end
 
-s_wise_max!(ddp::DiscreteDP, vals::Matrix, out::Vector, out_argmax::Vector) = _s_wise_max!(vals, out, out_argmax)
-s_wise_max!(ddp::DiscreteDP_sa, vals::Vector, out::Vector, out_argmax::Vector) = _s_wise_max!(ddp.a_indices, ddp.a_indptr, vals, out, out_argmax)
+s_wise_max!(ddp::DiscreteDP, vals::Matrix, out::Vector, out_argmax::Vector) =
+    _s_wise_max!(vals, out, out_argmax)
+s_wise_max!(ddp::DDPsa, vals::Vector, out::Vector, out_argmax::Vector) =
+    _s_wise_max!(get(ddp.a_indices), get(ddp.a_indptr), vals, out, out_argmax)
 
 """
 Return the `Vector` `max_a vals(s, a)`,  where `vals` is represented as a
@@ -461,19 +506,20 @@ _s_wise_max!(vals::Matrix, out::Vector, out_argmax::Vector) =
 Populate `out` with  `max_a vals(s, a)`,  where `vals` is represented as a
 `Vector` of size `(num_sa_pairs,)`.
 """
-function _s_wise_max!(a_indices::Vector, a_indptr::Vector, vals::Vector, out::Vector)
-  n = length(out)
-  for i in 1:n
-    if a_indptr[i] != a_indptr[i+1]
-      m = a_indptr[i]
-      for j in a_indptr[i]+1:(a_indptr[i+1]-1)
-        if vals[j] > vals[m]
-          m = j
+function _s_wise_max!(a_indices::Vector, a_indptr::Vector, vals::Vector,
+                      out::Vector)
+    n = length(out)
+    for i in 1:n
+        if a_indptr[i] != a_indptr[i+1]
+            m = a_indptr[i]
+            for j in a_indptr[i]+1:(a_indptr[i+1]-1)
+                if vals[j] > vals[m]
+                    m = j
+                end
+            end
+            out[i] = vals[m]
         end
-      end
-      out[i] = vals[m]
     end
-  end
 end
 
 """
@@ -483,20 +529,21 @@ Populate `out` with  `max_a vals(s, a)`,  where `vals` is represented as a
 Also fills `out_argmax` with the linear index associated with the indmax in each
 row
 """
-function _s_wise_max!(a_indices::Vector, a_indptr::Vector, vals::Vector, out::Vector, out_argmax::Vector)
-  n = length(out)
-  for i in 1:n
-    if a_indptr[i] != a_indptr[i+1]
-      m = a_indptr[i]
-      for j in a_indptr[i]+1:(a_indptr[i+1]-1)
-        if vals[j] > vals[m]
-          m = j
+function _s_wise_max!(a_indices::Vector, a_indptr::Vector, vals::Vector,
+                      out::Vector, out_argmax::Vector)
+    n = length(out)
+    for i in 1:n
+        if a_indptr[i] != a_indptr[i+1]
+            m = a_indptr[i]
+            for j in a_indptr[i]+1:(a_indptr[i+1]-1)
+                if vals[j] > vals[m]
+                    m = j
+                end
+            end
+            out[i] = vals[m]
+            out_argmax[i] = a_indices[m]
         end
-      end
-      out[i] = vals[m]
-      out_argmax[i] = a_indices[m]
     end
-  end
 end
 
 """
@@ -511,21 +558,22 @@ Returns
 bool: Whether `s_indices` and `a_indices` are sorted.
 """
 function _has_sorted_sa_indices(s_indices::Vector, a_indices::Vector)
-  L = length(s_indices)
-  ev = NaN
-  for i in 1:L-1
-    ((s_indices[i] == s_indices[i+1]) && (a_indices[i] == a_indices[i+1])) && throw(ArgumentError("duplicate state-action pair"))
-    if s_indices[i] > s_indices[i+1]
-      ev = false
-      break
-    end
-    if (s_indices[i] == s_indices[i+1]) && (a_indices[i] > a_indices[i+1])
-      ev =  false
-      break
+    L = length(s_indices)
+    ev = true
+    for i in 1:L-1
+        if s_indices[i] != s_indices[i+1] || a_indices[i] != a_indices[i+1]
+            throw(ArgumentError("duplicate state-action pair"))
+        end
+        if s_indices[i] > s_indices[i+1]
+            ev = false
+            break
+        end
+        if (s_indices[i] == s_indices[i+1]) && (a_indices[i] > a_indices[i+1])
+            ev =  false
+            break
+        end
     end
-    ev =  true
-  end
-  ev
+    ev
 end
 
 """
@@ -541,25 +589,28 @@ s_indices : Vector{Int}
 out : Vector{Int} with length = num_states+1
 """
 function _generate_a_indptr!(num_states::Int, s_indices::Vector, out::Vector)
-  idx = 1
-  out[1] = 1
-  for s in 1:num_states-1
-    while(s_indices[idx] == s)
-      idx += 1
+    idx = 1
+    out[1] = 1
+    for s in 1:num_states-1
+        while(s_indices[idx] == s)
+            idx += 1
+        end
+        out[s+1] = idx
     end
-    out[s+1] = idx
-  end
-  out[num_states+1] = length(s_indices)+1 # need this +1 to be consistent with Julia's sparse pointers: colptr[i]:(colptr[i+1]-1)
-  out
+    # need this +1 to be consistent with Julia's sparse pointers:
+    # colptr[i]:(colptr[i+1]-1)
+    out[num_states+1] = length(s_indices)+1
+    out
 end
 
-function _find_indices!(a_indices::Vector, a_indptr::Vector, sigma::Vector, out::Vector)
-  n = length(sigma)
-  for i in 1:n, j in a_indptr[i]:(a_indptr[i+1]-1)
-    if sigma[i] == a_indices[j]
-      out[i] = j
+function _find_indices!(a_indices::Vector, a_indptr::Vector, sigma::Vector,
+                        out::Vector)
+    n = length(sigma)
+    for i in 1:n, j in a_indptr[i]:(a_indptr[i+1]-1)
+        if sigma[i] == a_indices[j]
+            out[i] = j
+        end
     end
-  end
 end
 
 #=
@@ -578,7 +629,6 @@ function *{T}(A::Array{T,3}, v::Vector)
     return reshape(out, shape[1:end-1])
 end
 
-
 Base.ind2sub(ddp::DiscreteDP, x::Vector) =
     map(_ -> ind2sub(size(ddp.R), _)[2], x)
 
@@ -586,7 +636,7 @@ Base.ind2sub(ddp::DiscreteDP, x::Vector) =
 Impliments Value Iteration
 NOTE: See `solve` for further details
 """
-function _solve!(ddp::AbstractDiscreteDP, ddpr::DPSolveResult{VFI}, max_iter::Integer,
+function _solve!(ddp::DiscreteDP, ddpr::DPSolveResult{VFI}, max_iter::Integer,
                  epsilon::Real, k::Integer)
     if ddp.beta == 0.0
         tol = Inf
@@ -615,10 +665,10 @@ end
 Policy Function Iteration
 
 NOTE: The epsilon is ignored in this method. It is only here so dispatch can
-      go from `solve(::AbstractDiscreteDP, ::Type{Algo})` to any of the algorithms.
+      go from `solve(::DiscreteDP, ::Type{Algo})` to any of the algorithms.
       See `solve` for further details
 """
-function _solve!(ddp::AbstractDiscreteDP, ddpr::DPSolveResult{PFI}, max_iter::Integer,
+function _solve!(ddp::DiscreteDP, ddpr::DPSolveResult{PFI}, max_iter::Integer,
                  epsilon::Real, k::Integer)
     old_sigma = copy(ddpr.sigma)
 
@@ -643,7 +693,7 @@ midrange(x::Vector) = mean(extrema(x))
 """
 Modified Policy Function Iteration
 """
-function _solve!(ddp::AbstractDiscreteDP, ddpr::DPSolveResult{MPFI}, max_iter::Integer,
+function _solve!(ddp::DiscreteDP, ddpr::DPSolveResult{MPFI}, max_iter::Integer,
                  epsilon::Real, k::Integer)
     beta = ddp.beta
     fill!(ddpr.v, minimum(ddp.R[ddp.R .> -Inf]) / (1.0 - beta))