# Student Example
## Student MRP
![student_mrp](../images/lecture_1/student_mrp.png)

In [1]:
import numpy as np
"""
states:
class_1 class_2 class_3 facebook pub pass sleep
"""
p_mrp = np.array([[0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0],
              [0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.2],
              [0.0, 0.0, 0.0, 0.0, 0.4, 0.6, 0.0],
              [0.1, 0.0, 0.0, 0.9, 0.0, 0.0, 0.0],
              [0.2, 0.4, 0.4, 0.0, 0.0, 0.0, 0.0],
              [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
              [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]])
r_mrp = np.array([-2, -2, -2, -1, +1, +10, 0])


## State Value
![gamma_0](../images/lecture_1/student_mrp_sv_gamma_0.png)
![gamma_0.9](../images/lecture_1/student_mrp_sv_gamma_09.png)
![gamma_1](../images/lecture_1/student_mrp_sv_gamma_1.png)
### Analytic Solution

In [2]:
def analytic_mrp(p_transition,reward,gamma,num_state):
    return np.linalg.pinv(np.eye(num_state)-gamma*p_transition).dot(reward)

In [3]:
analytic_mrp(p_mrp,r_mrp,0,7)

array([-2., -2., -2., -1.,  1., 10.,  0.])

In [4]:
analytic_mrp(p_mrp,r_mrp,0.9,7)

array([-5.01272891e+00,  9.42655298e-01,  4.08702125e+00, -7.63760843e+00,
        1.90839235e+00,  1.00000000e+01, -1.66533454e-16])

In [5]:
analytic_mrp(p_mrp,r_mrp,1,7)

array([ -9.8994709 ,   4.1005291 ,   6.96472663, -19.8994709 ,
         3.44620811,  12.64373898,   2.64373898])

### Dynamic Progarmming Solution

In [6]:
def dp_mrp(p_transition,reward,gamma):
    v_0=reward
    v_1=reward+gamma*p_transition.dot(v_0)
    while np.sum(np.power(v_1-v_0,2))>1e-3:
        v_0=v_1
        v_1=reward+gamma*p_transition.dot(v_0)
    return v_1

In [7]:
dp_mrp(p_mrp,r_mrp,0)

array([-2., -2., -2., -1.,  1., 10.,  0.])

In [8]:
dp_mrp(p_mrp,r_mrp,0.9)

array([-4.92590951,  0.95204197,  4.09823574, -7.48108474,  1.93518601,
       10.        ,  0.        ])

In [9]:
dp_mrp(p_mrp,r_mrp,1)

array([-12.23255073,   1.49040901,   4.36114002, -21.9831768 ,
         0.89838023,  10.        ,   0.        ])