Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: added routines to convert ddp between full and SA formulations #318

Merged
merged 3 commits into from
Aug 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions quantecon/markov/ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,73 @@ def _check_action_feasibility(self):
'violated for state {s}'.format(s=s)
)

def to_sa_pair_form(self, sparse=True):
"""
Convert this instance of `DiscreteDP` to SA-pair form

Parameters
----------
sparse : bool, optional(default=True)
Should the `Q` matrix be stored as a sparse matrix?
If true the CSR format is used

Returns
-------
ddp_sa : DiscreteDP
The correspnoding DiscreteDP instance in SA-pair form

Notes
-----
If this instance is already in SA-pair form then it is returned
un-modified
"""

if self._sa_pair:
return self
else:
s_ind, a_ind = np.where(self.R > - np.inf)
RL = self.R[s_ind, a_ind]
if sparse:
QL = sp.csr_matrix(self.Q[s_ind, a_ind])
else:
QL = self.Q[s_ind, a_ind]
return DiscreteDP(RL, QL, self.beta, s_ind, a_ind)

def to_product_form(self):
"""
Convert this instance of `DiscreteDP` to the "product" form.

The product form uses the version of the init method taking
`R`, `Q` and `beta`.

Parameters
----------

Returns
-------
ddp_sa : DiscreteDP
The correspnoding DiscreteDP instance in product form

Notes
-----
If this instance is already in product form then it is returned
un-modified

"""
if self._sa_pair:
ns = self.num_states
na = self.a_indices.max() + 1
R = np.full((ns, na), -np.inf)
R[self.s_indices, self.a_indices] = self.R
Q = np.zeros((ns, na, ns))
if self._sparse:
_fill_dense_Q(self.s_indices, self.a_indices, self.Q.toarray(), Q)
else:
_fill_dense_Q(self.s_indices, self.a_indices, self.Q, Q)
return DiscreteDP(R, Q, self.beta)
else:
return self

def RQ_sigma(self, sigma):
"""
Given a policy `sigma`, return the reward vector `R_sigma` and
Expand Down Expand Up @@ -963,6 +1030,15 @@ def backward_induction(ddp, T, v_term=None):
return vs, sigmas


@jit(nopython=True)
def _fill_dense_Q(s_indices, a_indices, Q_in, Q_out):
L = Q_in.shape[0]
for i in range(L):
Q_out[s_indices[i], a_indices[i], :] = Q_in[i, :]

return Q_out


@jit(nopython=True)
def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax):
n = len(out_max)
Expand Down
74 changes: 60 additions & 14 deletions quantecon/markov/tests/test_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import scipy.sparse as sparse
from numpy.testing import assert_array_equal, assert_allclose, assert_raises
from nose.tools import eq_, ok_
from nose.tools import ok_

from quantecon.markov import DiscreteDP, backward_induction

Expand Down Expand Up @@ -126,10 +126,7 @@ def test_ddp_beta_0():
v_init = [0, 0, 0]

ddp0 = DiscreteDP(R, Q, beta)
s_indices, a_indices = np.where(R > -np.inf)
R_sa = R[s_indices, a_indices]
Q_sa = Q[s_indices, a_indices]
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
ddp1 = ddp0.to_sa_pair_form()
methods = ['vi', 'pi', 'mpi']

for ddp in [ddp0, ddp1]:
Expand All @@ -140,7 +137,6 @@ def test_ddp_beta_0():


def test_ddp_sorting():
n, m = 2, 2
beta = 0.95

# Sorted
Expand Down Expand Up @@ -238,8 +234,6 @@ def test_ddp_negative_inf_error():


def test_ddp_no_feasibile_action_error():
n, m = 3, 2

# No action is feasible at state 1
s_indices = [0, 0, 2, 2]
a_indices = [0, 1, 0, 1]
Expand All @@ -258,12 +252,8 @@ def test_ddp_beta_1_not_implemented_error():
beta = 1

ddp0 = DiscreteDP(R, Q, beta)
s_indices, a_indices = np.where(R > -np.inf)
R_sa = R[s_indices, a_indices]
Q_sa = Q[s_indices, a_indices]
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
Q_sa_sp = sparse.csr_matrix(Q_sa)
ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices)
ddp1 = ddp0.to_sa_pair_form()
ddp2 = ddp0.to_sa_pair_form(sparse=False)

solution_methods = \
['value_iteration', 'policy_iteration', 'modified_policy_iteration']
Expand All @@ -274,6 +264,62 @@ def test_ddp_beta_1_not_implemented_error():
assert_raises(NotImplementedError, getattr(ddp, method))


def test_ddp_to_sa_and_to_product():
n, m = 3, 2
R = np.array([[0, 1], [1, 0], [-np.inf, 1]])
Q = np.empty((n, m, n))
Q[:] = 1/n
Q[0, 0, 0] = 0
Q[0, 0, 1] = 2/n
beta = 0.95

sparse_R = np.array([0, 1, 1, 0, 1])
_Q = np.full((5, 3), 1/3)
_Q[0, 0] = 0
_Q[0, 1] = 2/n
sparse_Q = sparse.coo_matrix(_Q)

ddp = DiscreteDP(R, Q, beta)
ddp_sa = ddp.to_sa_pair_form()
ddp_sa2 = ddp_sa.to_sa_pair_form()
ddp_sa3 = ddp.to_sa_pair_form(sparse=False)
ddp2 = ddp_sa.to_product_form()
ddp3 = ddp_sa2.to_product_form()
ddp4 = ddp.to_product_form()

# make sure conversion worked
for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
assert_allclose(ddp_s.R, sparse_R)
# allcose doesn't work on sparse
np.max(np.abs((sparse_Q - ddp_s.Q))) < 1e-15
assert_allclose(ddp_s.beta, beta)

# these two will have probability 0 in state 2, action 0 b/c
# of the infeasiability in R
funky_Q = np.empty((n, m, n))
funky_Q[:] = 1/n
funky_Q[0, 0, 0] = 0
funky_Q[0, 0, 1] = 2/n
funky_Q[2, 0, :] = 0
for ddp_f in [ddp2, ddp3]:
assert_allclose(ddp_f.R, ddp.R)
assert_allclose(ddp_f.Q, funky_Q)
assert_allclose(ddp_f.beta, ddp.beta)

# this one is just the original one.
assert_allclose(ddp4.R, ddp.R)
assert_allclose(ddp4.Q, ddp.Q)
assert_allclose(ddp4.beta, ddp.beta)

for method in ["pi", "vi", "mpi"]:
sol1 = ddp.solve(method=method)
for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
sol2 = ddp_other.solve(method=method)

for k in ["v", "sigma", "num_iter"]:
assert_allclose(sol1[k], sol2[k])


if __name__ == '__main__':
import sys
import nose
Expand Down