Skip to content

Commit

Permalink
ENH: added routines to convert ddp between full and SA formulations
Browse files Browse the repository at this point in the history
  • Loading branch information
sglyon committed Jul 31, 2017
1 parent c60251d commit e3f713e
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 14 deletions.
76 changes: 76 additions & 0 deletions quantecon/markov/ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,73 @@ def _check_action_feasibility(self):
'violated for state {s}'.format(s=s)
)

def to_sa_formulation(self, sparse=True):
"""
Convert this instance of `DiscreteDP` to SA-pair form
Parameters
----------
sparse : bool, optional(default=True)
Should the `Q` matrix be stored as a sparse matrix.
If true the CSR format is used
Returns
-------
ddp_sa : DiscreteDP
The correspnoding DiscreteDP instance in SA form
Notes
-----
If this instance is already in SA form then it is returned
un-modified
"""

if self._sa_pair:
return self
else:
s_ind, a_ind = np.where(np.isfinite(self.R))
RL = self.R[s_ind, a_ind]
if sparse:
QL = sp.csr_matrix(self.Q[s_ind, a_ind])
else:
QL = self.Q[s_ind, a_ind]
return DiscreteDP(RL, QL, self.beta, s_ind, a_ind)

def to_full_formulation(self):
"""
Convert this instance of `DiscreteDP` to the "full" form.
The full form uses the version of the init method taking
`R`, `Q` and `beta`.
Parameters
----------
Returns
-------
ddp_sa : DiscreteDP
The correspnoding DiscreteDP instance in full form
Notes
-----
If this instance is already in full form then it is returned
un-modified
"""
if self._sa_pair:
ns = self.num_states
na = self.a_indices.max() + 1
R = np.zeros((ns, na))
R[self.s_indices, self.a_indices] = self.R
Q = np.zeros((ns, na, ns))
if self._sparse:
_fill_dense_Q(self.s_indices, self.a_indices, self.Q.toarray(), Q)
else:
_fill_dense_Q(self.s_indices, self.a_indices, self.Q, Q)
return DiscreteDP(R, Q, self.beta)
else:
return self

def RQ_sigma(self, sigma):
"""
Given a policy `sigma`, return the reward vector `R_sigma` and
Expand Down Expand Up @@ -963,6 +1030,15 @@ def backward_induction(ddp, T, v_term=None):
return vs, sigmas


@jit(nopython=True)
def _fill_dense_Q(s_indices, a_indices, Q_in, Q_out):
L = Q_in.shape[0]
for i in range(L):
Q_out[s_indices[i], a_indices[i], :] = Q_in[i, :]

return Q_out


@jit(nopython=True)
def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax):
n = len(out_max)
Expand Down
57 changes: 43 additions & 14 deletions quantecon/markov/tests/test_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import scipy.sparse as sparse
from numpy.testing import assert_array_equal, assert_allclose, assert_raises
from nose.tools import eq_, ok_
from nose.tools import ok_

from quantecon.markov import DiscreteDP, backward_induction

Expand Down Expand Up @@ -126,10 +126,7 @@ def test_ddp_beta_0():
v_init = [0, 0, 0]

ddp0 = DiscreteDP(R, Q, beta)
s_indices, a_indices = np.where(R > -np.inf)
R_sa = R[s_indices, a_indices]
Q_sa = Q[s_indices, a_indices]
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
ddp1 = ddp0.to_sa_formulation()
methods = ['vi', 'pi', 'mpi']

for ddp in [ddp0, ddp1]:
Expand All @@ -140,7 +137,6 @@ def test_ddp_beta_0():


def test_ddp_sorting():
n, m = 2, 2
beta = 0.95

# Sorted
Expand Down Expand Up @@ -238,8 +234,6 @@ def test_ddp_negative_inf_error():


def test_ddp_no_feasibile_action_error():
n, m = 3, 2

# No action is feasible at state 1
s_indices = [0, 0, 2, 2]
a_indices = [0, 1, 0, 1]
Expand All @@ -258,12 +252,8 @@ def test_ddp_beta_1_not_implemented_error():
beta = 1

ddp0 = DiscreteDP(R, Q, beta)
s_indices, a_indices = np.where(R > -np.inf)
R_sa = R[s_indices, a_indices]
Q_sa = Q[s_indices, a_indices]
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
Q_sa_sp = sparse.csr_matrix(Q_sa)
ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices)
ddp1 = ddp0.to_sa_formulation()
ddp2 = ddp0.to_sa_formulation(sparse=False)

solution_methods = \
['value_iteration', 'policy_iteration', 'modified_policy_iteration']
Expand All @@ -274,6 +264,45 @@ def test_ddp_beta_1_not_implemented_error():
assert_raises(NotImplementedError, getattr(ddp, method))


def test_ddp_to_sa_and_to_full():
n, m = 3, 2
R = np.array([[0, 1], [1, 0], [0, 1]])
Q = np.empty((n, m, n))
Q[:] = 1/n
beta = 0.95

sparse_R = np.array([0, 1, 1, 0, 0, 1])
sparse_Q = sparse.coo_matrix(np.full((6, 3), 1/3))

ddp = DiscreteDP(R, Q, beta)
ddp_sa = ddp.to_sa_formulation()
ddp_sa2 = ddp_sa.to_sa_formulation()
ddp_sa3 = ddp.to_sa_formulation(sparse=False)
ddp2 = ddp_sa.to_full_formulation()
ddp3 = ddp_sa2.to_full_formulation()
ddp4 = ddp.to_full_formulation()

# make sure conversion worked
for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
assert_allclose(ddp_s.R, sparse_R)
# allcose doesn't work on sparse
np.max(np.abs((sparse_Q - ddp_sa.Q))) < 1e-15
assert_allclose(ddp_s.beta, beta)

for ddp_f in [ddp2, ddp3, ddp4]:
assert_allclose(ddp_f.R, ddp.R)
assert_allclose(ddp_f.Q, ddp.Q)
assert_allclose(ddp_f.beta, ddp.beta)

for method in ["pi", "vi", "mpi"]:
sol1 = ddp.solve(method=method)
for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
sol2 = ddp_other.solve(method=method)

for k in ["v", "sigma", "num_iter"]:
assert_allclose(sol1[k], sol2[k])


if __name__ == '__main__':
import sys
import nose
Expand Down

0 comments on commit e3f713e

Please sign in to comment.