In [1]:
!pip install pymdptoolbox

Collecting pymdptoolbox
  Downloading pymdptoolbox-4.0-b3.zip (29 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pymdptoolbox
  Building wheel for pymdptoolbox (setup.py) ... [?25l[?25hdone
  Created wheel for pymdptoolbox: filename=pymdptoolbox-4.0b3-py3-none-any.whl size=25656 sha256=4c2f84b85d54f10789e9b64846e6e15e14bb7ab214db38dfa65e180ec5406d2e
  Stored in directory: /root/.cache/pip/wheels/2b/e7/c7/d7abf9e309f3573a934fed2750c70bd75d9e9d901f7f16e183
Successfully built pymdptoolbox
Installing collected packages: pymdptoolbox
Successfully installed pymdptoolbox-4.0b3


In [2]:
import mdptoolbox.example
import mdptoolbox.mdp
import numpy as np

In [3]:
# inputs
'''
    S  : is the number of states (in this example, the different possible ages of the forest)
    r1 : is the reward that you get when you 'wait' and the forest is in its oldest state
    r2 : is the reward that you get when you 'cut' the trees and the forest is in its oldest state
    p  : is the probability of a wildfire occurrence
'''

# outputs
'''
    P : the transition probability matrix, a numpy array of shape (A, S, S) where A is the possible actions
    and S is the possible states

    R : the reward matrix of shape (S, A)
'''
P, R = mdptoolbox.example.forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False)

In [4]:
P

array([[[0.1, 0.9, 0. ],
        [0.1, 0. , 0.9],
        [0.1, 0. , 0.9]],

       [[1. , 0. , 0. ],
        [1. , 0. , 0. ],
        [1. , 0. , 0. ]]])

In [5]:
P[0] # this is the probability transition matrix if the action 'wait' is chosen

array([[0.1, 0.9, 0. ],
       [0.1, 0. , 0.9],
       [0.1, 0. , 0.9]])

In [6]:
'''
ex: what is the probability that a forest in its youngest state
 will advance to the next oldest, if we wait?
'''
print(P[0][0][1])

0.9


In [7]:
'''
ex: what is the probability that a forest in its oldest state
 will burn down, if we wait?
'''
print(P[0][2][0])

0.1


Exploring the rewards matrix. Rewards matrix has shape S x A (S,A).

In [8]:
R

array([[0., 0.],
       [0., 1.],
       [4., 2.]])

In [9]:
# what reward do we get if we choose to wait, and the forest is in its oldest state?
np.sum(R.T[0]*[0, 0, 1])

4.0

In [10]:
# what reward do we get if we choose to wait, and the forest is in any other state?
np.sum(np.multiply(R.T[0], [1, 1, 0]))

0.0

In [11]:
# what reward do we get if we choose to cut, and the forest is in its oldest state?
np.sum(np.multiply(R.T[1], [0, 0, 1]))

2.0

In [12]:
# what reward do we get if we choose to cut, and the forest is in its second youngest state?
np.sum(np.multiply(R.T[1], [0, 1, 0]))

1.0

#Finding the optimal "policy"

In [13]:
model = mdptoolbox.mdp.QLearning(P, R, discount = 0.1)
model.run()
model.policy

(0, 1, 1)

In [14]:
# should we wait (0) or cut (1) in the youngest state?
model.policy[0]

0

In [15]:
# should we wait (0) or cut (1) in the second youngest state?
model.policy[1]

1

In [16]:
# should we wait (0) or cut (1) in the oldest state?
model.policy[2]

1

#applying a discount to our model.
(what is a discount?)

In [17]:
# 99% discount says that it is very likely that the scenario will continue into the future (long-term strategy)
model = mdptoolbox.mdp.QLearning(P, R, discount = 0.99)
model.run()
model.policy

(0, 0, 0)

In [18]:
# 1% discount says that it is very likely the scenario will not continue in the future (short-term)
model = mdptoolbox.mdp.QLearning(P, R, discount = 0.5)
model.run()
model.policy

(0, 0, 0)

In [19]:
mdptoolbox.example.rand(S = 5, A = 4)


(array([[[0.        , 0.        , 0.        , 1.        , 0.        ],
         [0.62171401, 0.        , 0.        , 0.37828599, 0.        ],
         [0.26954371, 0.53876999, 0.        , 0.        , 0.19168631],
         [0.        , 0.        , 1.        , 0.        , 0.        ],
         [0.10068004, 0.37400855, 0.3819096 , 0.04990294, 0.09349887]],
 
        [[0.28870937, 0.31474932, 0.        , 0.        , 0.39654131],
         [0.        , 0.        , 0.        , 0.        , 1.        ],
         [0.0970911 , 0.34278019, 0.        , 0.16673506, 0.39339366],
         [0.26739848, 0.        , 0.33516087, 0.02771253, 0.36972812],
         [0.        , 0.        , 1.        , 0.        , 0.        ]],
 
        [[0.        , 0.02094889, 0.        , 0.97905111, 0.        ],
         [0.20911959, 0.1292207 , 0.1318295 , 0.21160817, 0.31822205],
         [0.34537801, 0.59211727, 0.06250472, 0.        , 0.        ],
         [0.15981692, 0.37394382, 0.3073153 , 0.10140783, 0.05751614],


In [20]:
len(ec_data)


NameError: ignored