## MDP Tests

### Imports

In [1]:
from random_mdp_factory import MDPFactory
from enums import MDPTransitionType, MDPRewardType

In [2]:
import numpy as np
import warnings

In [3]:
warnings.filterwarnings(action="ignore")

### Transition S Deterministic, Reward S

In [4]:
factory_sd_s = MDPFactory(MDPTransitionType.S_DETERMINISTIC, MDPRewardType.S, 10, 4, 5)

In [5]:
print("Terminate :", factory_sd_s.terminate_s_flags)
print("Transitions :", factory_sd_s.transitions)
print("Rewards :", factory_sd_s.rewards)

Terminate : [False  True False False False False False False False False]
Transitions : [7 3 9 1 1 1 0 5 4 8]
Rewards : [ 0  1  0  0 -2 -2 -1 -1 -3 -2]


In [6]:
factory_sd_s.train_policy()
factory_sd_s.play()

. => 0 None False False {}
0 => 7 0 False False {}
0 => 5 -1 False False {}
0 => 1 -2 True False {}


### Transition S Probabilistic, Reward S

In [7]:
factory_sp_s = MDPFactory(MDPTransitionType.S_PROBABILISTIC, MDPRewardType.S, 10, 5, 10)

In [8]:
print("Terminate :", factory_sp_s.terminate_s_flags)
print("Rewards :", factory_sp_s.rewards)
print("Transitions :\n", factory_sp_s.transitions)

Terminate : [False False False  True False False False False False False]
Rewards : [-8 -1 -6  1 -1 -4 -7 -1 -4 -8]
Transitions :
 [[0.09375    0.25       0.09375    0.1875     0.03125    0.125
  0.03125    0.15625    0.         0.03125   ]
 [0.09615385 0.15384615 0.01923077 0.15384615 0.17307692 0.03846154
  0.11538462 0.13461538 0.11538462 0.        ]
 [0.1509434  0.         0.11320755 0.05660377 0.1509434  0.11320755
  0.13207547 0.0754717  0.0754717  0.13207547]
 [0.08571429 0.17142857 0.11428571 0.02857143 0.14285714 0.
  0.11428571 0.11428571 0.2        0.02857143]
 [0.13513514 0.16216216 0.10810811 0.05405405 0.08108108 0.05405405
  0.         0.08108108 0.10810811 0.21621622]
 [0.02222222 0.2        0.         0.04444444 0.13333333 0.13333333
  0.13333333 0.11111111 0.15555556 0.06666667]
 [0.06122449 0.14285714 0.18367347 0.14285714 0.12244898 0.06122449
  0.         0.12244898 0.10204082 0.06122449]
 [0.12195122 0.02439024 0.12195122 0.09756098 0.12195122 0.02439024
  0.12195

In [9]:
factory_sp_s.play()

. => 0 None False False {}
0 => 5 -8 False False {}
0 => 5 -4 False False {}
0 => 8 -4 False False {}
0 => 5 -4 False False {}
0 => 1 -4 False False {}
0 => 8 -1 False False {}
0 => 6 -4 False False {}
0 => 7 -7 False False {}
0 => 1 -1 False False {}
0 => 4 -1 False False {}
0 => 2 -1 False False {}
0 => 9 -6 False False {}
0 => 6 -8 False False {}
0 => 7 -7 False False {}
0 => 4 -1 False False {}
0 => 1 -1 False False {}
0 => 4 -1 False False {}
0 => 0 -1 False False {}
0 => 7 -8 False False {}
0 => 6 -1 False False {}
0 => 7 -7 False False {}
0 => 3 -1 True False {}


### Transition SA Deterministic, Reward SA

In [28]:
factory_sad_sa = MDPFactory(MDPTransitionType.SA_DETERMINISTIC, MDPRewardType.SA, 10, 5, 10, p=0.01, eps=1e-6)
print("Terminate :", factory_sad_sa.terminate_s_flags)
print("\nRewards :\n", factory_sad_sa.rewards)
print("\nTransitions :\n", factory_sad_sa.transitions)

Terminate : [False False False False False False False False False  True]

Rewards :
 [[-2 -6 -2  1 -7]
 [-2  0 -8 -7  1]
 [ 1 -4 -2 -4 -4]
 [ 1  1 -3 -3 -4]
 [-4  0 -6 -6 -2]
 [-1 -6 -2 -6 -7]
 [-2 -3 -6  1 -2]
 [ 1 -5 -3 -2 -8]
 [ 0 -1 -2 -5 -5]
 [-8 -5 -3 -5 -2]]

Transitions :
 [[1 6 5 9 2]
 [0 2 4 5 9]
 [9 4 8 4 8]
 [9 9 4 1 8]
 [5 3 6 1 3]
 [6 6 5 4 8]
 [4 2 4 9 0]
 [9 7 0 4 2]
 [5 6 3 6 6]
 [2 5 3 1 4]]


In [29]:
factory_sad_sa.play()

. => 0 None False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {}
0 => 0 -2 False False {}
0 => 1 -2 False False {

In [30]:
factory_sad_sa.train_policy()
factory_sad_sa.get_policy()

array([3, 1, 0, 0, 1, 0, 3, 0, 0, 4])

In [31]:
factory_sad_sa.play()

. => 0 None False False {}
3 => 9 1 True False {}
