Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: added routines to convert ddp between full and SA formulations #318

Merged
merged 3 commits into from
Aug 4, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions quantecon/markov/ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,73 @@ def _check_action_feasibility(self):
'violated for state {s}'.format(s=s)
)

def to_sa_formulation(self, sparse=True):
"""
Convert this instance of `DiscreteDP` to SA-pair form

Parameters
----------
sparse : bool, optional(default=True)
Should the `Q` matrix be stored as a sparse matrix.
If true the CSR format is used

Returns
-------
ddp_sa : DiscreteDP
The correspnoding DiscreteDP instance in SA form

Notes
-----
If this instance is already in SA form then it is returned
un-modified
"""

if self._sa_pair:
return self
else:
s_ind, a_ind = np.where(np.isfinite(self.R))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to write self.R > -np.inf instead of np.isfinite(self.R)?
(self.R may contain np.inf.)

RL = self.R[s_ind, a_ind]
if sparse:
QL = sp.csr_matrix(self.Q[s_ind, a_ind])
else:
QL = self.Q[s_ind, a_ind]
return DiscreteDP(RL, QL, self.beta, s_ind, a_ind)

def to_full_formulation(self):
"""
Convert this instance of `DiscreteDP` to the "full" form.

The full form uses the version of the init method taking
`R`, `Q` and `beta`.

Parameters
----------

Returns
-------
ddp_sa : DiscreteDP
The correspnoding DiscreteDP instance in full form

Notes
-----
If this instance is already in full form then it is returned
un-modified

"""
if self._sa_pair:
ns = self.num_states
na = self.a_indices.max() + 1
R = np.zeros((ns, na))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this should be np.full((ns, na), -np.inf)?

R[self.s_indices, self.a_indices] = self.R
Q = np.zeros((ns, na, ns))
if self._sparse:
_fill_dense_Q(self.s_indices, self.a_indices, self.Q.toarray(), Q)
else:
_fill_dense_Q(self.s_indices, self.a_indices, self.Q, Q)
return DiscreteDP(R, Q, self.beta)
else:
return self

def RQ_sigma(self, sigma):
"""
Given a policy `sigma`, return the reward vector `R_sigma` and
Expand Down Expand Up @@ -963,6 +1030,15 @@ def backward_induction(ddp, T, v_term=None):
return vs, sigmas


@jit(nopython=True)
def _fill_dense_Q(s_indices, a_indices, Q_in, Q_out):
L = Q_in.shape[0]
for i in range(L):
Q_out[s_indices[i], a_indices[i], :] = Q_in[i, :]

return Q_out


@jit(nopython=True)
def _s_wise_max_argmax(a_indices, a_indptr, vals, out_max, out_argmax):
n = len(out_max)
Expand Down
57 changes: 43 additions & 14 deletions quantecon/markov/tests/test_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import scipy.sparse as sparse
from numpy.testing import assert_array_equal, assert_allclose, assert_raises
from nose.tools import eq_, ok_
from nose.tools import ok_

from quantecon.markov import DiscreteDP, backward_induction

Expand Down Expand Up @@ -126,10 +126,7 @@ def test_ddp_beta_0():
v_init = [0, 0, 0]

ddp0 = DiscreteDP(R, Q, beta)
s_indices, a_indices = np.where(R > -np.inf)
R_sa = R[s_indices, a_indices]
Q_sa = Q[s_indices, a_indices]
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
ddp1 = ddp0.to_sa_formulation()
methods = ['vi', 'pi', 'mpi']

for ddp in [ddp0, ddp1]:
Expand All @@ -140,7 +137,6 @@ def test_ddp_beta_0():


def test_ddp_sorting():
n, m = 2, 2
beta = 0.95

# Sorted
Expand Down Expand Up @@ -238,8 +234,6 @@ def test_ddp_negative_inf_error():


def test_ddp_no_feasibile_action_error():
n, m = 3, 2

# No action is feasible at state 1
s_indices = [0, 0, 2, 2]
a_indices = [0, 1, 0, 1]
Expand All @@ -258,12 +252,8 @@ def test_ddp_beta_1_not_implemented_error():
beta = 1

ddp0 = DiscreteDP(R, Q, beta)
s_indices, a_indices = np.where(R > -np.inf)
R_sa = R[s_indices, a_indices]
Q_sa = Q[s_indices, a_indices]
ddp1 = DiscreteDP(R_sa, Q_sa, beta, s_indices, a_indices)
Q_sa_sp = sparse.csr_matrix(Q_sa)
ddp2 = DiscreteDP(R_sa, Q_sa_sp, beta, s_indices, a_indices)
ddp1 = ddp0.to_sa_formulation()
ddp2 = ddp0.to_sa_formulation(sparse=False)

solution_methods = \
['value_iteration', 'policy_iteration', 'modified_policy_iteration']
Expand All @@ -274,6 +264,45 @@ def test_ddp_beta_1_not_implemented_error():
assert_raises(NotImplementedError, getattr(ddp, method))


def test_ddp_to_sa_and_to_full():
n, m = 3, 2
R = np.array([[0, 1], [1, 0], [0, 1]])
Q = np.empty((n, m, n))
Q[:] = 1/n
beta = 0.95

sparse_R = np.array([0, 1, 1, 0, 0, 1])
sparse_Q = sparse.coo_matrix(np.full((6, 3), 1/3))

ddp = DiscreteDP(R, Q, beta)
ddp_sa = ddp.to_sa_formulation()
ddp_sa2 = ddp_sa.to_sa_formulation()
ddp_sa3 = ddp.to_sa_formulation(sparse=False)
ddp2 = ddp_sa.to_full_formulation()
ddp3 = ddp_sa2.to_full_formulation()
ddp4 = ddp.to_full_formulation()

# make sure conversion worked
for ddp_s in [ddp_sa, ddp_sa2, ddp_sa3]:
assert_allclose(ddp_s.R, sparse_R)
# allcose doesn't work on sparse
np.max(np.abs((sparse_Q - ddp_sa.Q))) < 1e-15
assert_allclose(ddp_s.beta, beta)

for ddp_f in [ddp2, ddp3, ddp4]:
assert_allclose(ddp_f.R, ddp.R)
assert_allclose(ddp_f.Q, ddp.Q)
assert_allclose(ddp_f.beta, ddp.beta)

for method in ["pi", "vi", "mpi"]:
sol1 = ddp.solve(method=method)
for ddp_other in [ddp_sa, ddp_sa2, ddp_sa3, ddp2, ddp3, ddp4]:
sol2 = ddp_other.solve(method=method)

for k in ["v", "sigma", "num_iter"]:
assert_allclose(sol1[k], sol2[k])


if __name__ == '__main__':
import sys
import nose
Expand Down