In [2]:
import numpy as np
from scipy import linalg

In [3]:
# define the Transition Probability Matrix
n_states = 3
P = np.zeros((n_states, n_states), np.float)
P[0, 1] = 0.7
P[0, 2] = 0.3
P[1, 0] = 0.5
P[1, 2] = 0.5
P[2, 1] = 0.1
P[2, 2] = 0.9
P

array([[0. , 0.7, 0.3],
       [0.5, 0. , 0.5],
       [0. , 0.1, 0.9]])

In [4]:
# the sum over columns is 1 for each row being a probability matrix
assert((np.sum(P, axis=1) == 1).all())

In [5]:
# define the reward matrix
R = np.zeros((n_states, n_states), np.float)
R[0, 1] = 1
R[0, 2] = 10
R[1, 0] = 0
R[1, 2] = 1
R[2, 1] = -1
R[2, 2] = 10

In [6]:
# calculate expected reward for each state by multiplying the probability matrix for each reward
R_expected = np.sum(P * R, axis=1, keepdims=True)

In [7]:
# The matrix R_expected
R_expected

array([[3.7],
       [0.5],
       [8.9]])

In [10]:
# define the discount factor
gamma = 0.9

In [11]:
# Now it is possible to solve the Bellman Equation
A = np.eye(n_states) - gamma * P

In [12]:
B = R_expected

In [15]:
# solve using scipy linalg
V = linalg.solve(A, B)
V

array([[65.540732  ],
       [64.90791027],
       [77.5879575 ]])