Experiments on optimal policies in SMDP version of RiverSwim. 

In [416]:
# Load packages and utilities. 
from tabulate import tabulate
import numpy as np
import grid_world_class as gw
import riverswim_class as rs 
import riverswim_class_smdp as rs_s 

import UCRL2_L as ucrl
import UCRL_SMDP as ucrlS
import experiment_utils as utils
import importlib
importlib.reload(gw)
importlib.reload(rs)
importlib.reload(rs_s)
importlib.reload(ucrl)
importlib.reload(ucrlS)
importlib.reload(utils)
import matplotlib.pyplot as plt

In [417]:
# Test of riverswim smdp.
env_smdp = rs_s.riverswim(nS=20, T_max=20,distribution='uniform',param=None) #parameter not needed.
env_option = rs.riverswim(nS=20, T_max=20)
env_mdp = rs.riverswim(nS=20, T_max=1) # standard mdp)

print(utils.VI(env_smdp))
print(utils.VI(env_option))
print(utils.VI(env_mdp))


(33, array([1.6       , 1.54444444, 1.48888889, 1.43333333, 1.37777778,
       1.32222222, 1.26666667, 1.21111111, 1.15555556, 1.1       ,
       1.04444444, 0.98888889, 0.93333333, 0.87777778, 0.82222222,
       0.76666667, 0.71111111, 0.65555556, 0.88163052, 2.14079486]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]), 0.047134601264002096)
(138, array([37.11330937, 38.36867139, 39.78768979, 41.23385495, 42.68917881,
       44.15046135, 45.61644346, 47.08621531, 48.55901143, 50.03418513,
       51.51119383, 52.98958937, 54.46900833, 55.94916214, 57.42982701,
       58.91083411, 60.39206014, 61.87341847, 63.35485095, 64.83632064]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 0.46172766875120885)
(90, array([14.57849718, 15.84774217, 17.28460366, 18.74967216, 20.22296147,
       21.70008175, 23.17927394, 24.6595929 , 26.14050631, 27.62172134,
       29.10308312, 30.58451324, 32.06597373, 33.54744708, 35.0289256 ,
       36.51040608, 37.9918

In [418]:
# Test of riverswim smdp, constant holding time.
env_smdp = rs_s.riverswim(nS=20, T_max=20,distribution='constant',param = None)

print(utils.VI(env_smdp))
print(utils.VI(env_option))
print(utils.VI(env_mdp))

(46, array([2.25      , 2.19444444, 2.13888889, 2.08333333, 2.02777778,
       1.97222222, 1.91666667, 1.86111111, 1.80555556, 1.75      ,
       1.69444444, 1.63888889, 1.58333333, 1.52777778, 1.47222222,
       1.41666667, 1.36111111, 1.30555556, 1.25      , 1.80053465]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), 0.045045188190237284)
(138, array([37.11330937, 38.36867139, 39.78768979, 41.23385495, 42.68917881,
       44.15046135, 45.61644346, 47.08621531, 48.55901143, 50.03418513,
       51.51119383, 52.98958937, 54.46900833, 55.94916214, 57.42982701,
       58.91083411, 60.39206014, 61.87341847, 63.35485095, 64.83632064]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 0.46172766875120885)
(90, array([14.57849718, 15.84774217, 17.28460366, 18.74967216, 20.22296147,
       21.70008175, 23.17927394, 24.6595929 , 26.14050631, 27.62172134,
       29.10308312, 30.58451324, 32.06597373, 33.54744708, 35.0289256 ,
       36.51040608, 37.9918

In [419]:
# Test of riverswim smdp, modified poisson.
env_smdp = rs_s.riverswim(nS=20, T_max=20,distribution='poisson', param = 7) # lambda = 4

print(utils.VI(env_smdp))
print(utils.VI(env_option))
print(utils.VI(env_mdp))

No convergence in VI after:  1000  steps!
(1001, array([50.05      , 49.99444444, 49.93888889, 49.88333333, 49.82777778,
       49.77222222, 49.71666667, 50.14037363, 51.42028007, 52.85927573,
       54.33151915, 55.81101675, 57.29207737, 58.77346983, 60.25493249,
       61.73641   , 63.21789066, 64.69937198, 66.18085343, 67.66233491]), array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 0.058328991658743945)
(138, array([37.11330937, 38.36867139, 39.78768979, 41.23385495, 42.68917881,
       44.15046135, 45.61644346, 47.08621531, 48.55901143, 50.03418513,
       51.51119383, 52.98958937, 54.46900833, 55.94916214, 57.42982701,
       58.91083411, 60.39206014, 61.87341847, 63.35485095, 64.83632064]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 0.46172766875120885)
(90, array([14.57849718, 15.84774217, 17.28460366, 18.74967216, 20.22296147,
       21.70008175, 23.17927394, 24.6595929 , 26.14050631, 27.62172134,
       29.10308312, 30.58451324, 32

In [420]:
# Test of riverswim smdp, modified binomial.
env_smdp = rs_s.riverswim(nS=20, T_max=20,distribution='binomial',param = 0.4)

print(utils.VI(env_smdp))
print(utils.VI(env_option))
print(utils.VI(env_mdp))

(75, array([3.7       , 3.64444444, 3.58888889, 3.53333333, 3.47777778,
       3.42222222, 3.36666667, 3.31111111, 3.25555556, 3.2       ,
       3.14444444, 3.08888889, 3.03333333, 2.97777778, 2.92222222,
       2.86666667, 2.81111111, 2.75555556, 3.77578662, 5.22058064]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]), 0.054991997663440984)
(138, array([37.11330937, 38.36867139, 39.78768979, 41.23385495, 42.68917881,
       44.15046135, 45.61644346, 47.08621531, 48.55901143, 50.03418513,
       51.51119383, 52.98958937, 54.46900833, 55.94916214, 57.42982701,
       58.91083411, 60.39206014, 61.87341847, 63.35485095, 64.83632064]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 0.46172766875120885)
(90, array([14.57849718, 15.84774217, 17.28460366, 18.74967216, 20.22296147,
       21.70008175, 23.17927394, 24.6595929 , 26.14050631, 27.62172134,
       29.10308312, 30.58451324, 32.06597373, 33.54744708, 35.0289256 ,
       36.51040608, 37.9918

In [421]:
# Test of riverswim smdp, geometrical.
env_smdp = rs_s.riverswim(nS=20, T_max=20,distribution='geometric',param = 0.3)

print(utils.VI(env_smdp))
print(utils.VI(env_option))
print(utils.VI(env_mdp))

(325, array([18.55225662, 19.75648295, 21.14953303, 22.59275273, 24.05637612,
       25.52949171, 27.00710399, 28.48682836, 29.96752932, 31.44867292,
       32.93001243, 34.41143624, 35.8928952 , 37.37436827, 38.85584677,
       40.33732726, 41.81880844, 43.30028984, 44.78177131, 46.26325279]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 0.13502822156950778)
(138, array([37.11330937, 38.36867139, 39.78768979, 41.23385495, 42.68917881,
       44.15046135, 45.61644346, 47.08621531, 48.55901143, 50.03418513,
       51.51119383, 52.98958937, 54.46900833, 55.94916214, 57.42982701,
       58.91083411, 60.39206014, 61.87341847, 63.35485095, 64.83632064]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 0.46172766875120885)
(90, array([14.57849718, 15.84774217, 17.28460366, 18.74967216, 20.22296147,
       21.70008175, 23.17927394, 24.6595929 , 26.14050631, 27.62172134,
       29.10308312, 30.58451324, 32.06597373, 33.54744708, 35.0289256 ,
       

With the tests being done, we proceed to write the crucial results (optimal gain and optimal policies) to latex tables. 

In [422]:
def tables(S,T_max,lambda_p,p1,p2):
    headers = ['Index/gain','MDP','Unif('+str(T_max)+')','Const('+str(T_max)+')',
            'Pois('+str(lambda_p)+')','Binomial('+str(p1)+')','Geom('+str(p2)+')']
    table = np.array(range(S)).reshape((S,1))


    env_mdp = utils.VI(rs.riverswim(nS=S, T_max=1)) # standard mdp)
    uniform = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='uniform',param=None) )#parameter not needed.
    constant = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='constant',param=None) )#parameter not needed.
    poisson = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='poisson',param=lambda_p)) #parameter not needed.
    binomial = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='binomial',param=p1)) #parameter not needed.
    geometric = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='geometric',param=p2)) #parameter not needed.
    policies = np.array([env_mdp[2],uniform[2],constant[2],poisson[2],binomial[2],geometric[2]]).T
    policies = np.where(policies==0,'L','R')
    policies 
    gains = np.array(['Gain',np.round(env_mdp[3],3),np.round(uniform[3],3),
                    np.round(constant[3],3),np.round(poisson[3],3),
                    np.round(binomial[3],3),np.round(geometric[3],3)]).reshape((1,7))

    table = np.append(table,policies,axis = 1)

    table=np.append(table,gains,axis=0)
    print(tabulate(table,headers=headers,tablefmt='latex',numalign="center"))



In [423]:
S = 20 # set number of states
T_max = 20 # set T_max
lambda_p = 4
p1 = 0.5
p2 = 0.5

tables(S,T_max,lambda_p,p1,p2)

\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(20)   & Const(20)   & Pois(4)   & Binomial(0.5)   & Geom(0.5)   \\
\hline
 0            & R     & L          & L           & R         & L               & R           \\
 1            & R     & L          & L           & R         & L               & R           \\
 2            & R     & L          & L           & R         & L               & R           \\
 3            & R     & L          & L           & R         & L               & R           \\
 4            & R     & L          & L           & R         & L               & R           \\
 5            & R     & L          & L           & R         & L               & R           \\
 6            & R     & L          & L           & R         & L               & R           \\
 7            & R     & L          & L           & R         & L               & R           \\
 8            & R     & L          & L           & R         & L               & R           \\
 

In [424]:
# Example where we always go right:
S = 20 # set number of states
T_max = 5 # set T_max
lambda_p = 4
p1 = 0.1
p2 = 0.5

tables(S,T_max,lambda_p,p1,p2)

\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(5)   & Const(5)   & Pois(4)   & Binomial(0.1)   & Geom(0.5)   \\
\hline
 0            & R     & R         & R          & R         & R               & R           \\
 1            & R     & R         & R          & R         & R               & R           \\
 2            & R     & R         & R          & R         & R               & R           \\
 3            & R     & R         & R          & R         & R               & R           \\
 4            & R     & R         & R          & R         & R               & R           \\
 5            & R     & R         & R          & R         & R               & R           \\
 6            & R     & R         & R          & R         & R               & R           \\
 7            & R     & R         & R          & R         & R               & R           \\
 8            & R     & R         & R          & R         & R               & R           \\
 9            & R    

In [432]:
# Example where we always go left:
S = 20 # set number of states
T_max = 50 # set T_max
lambda_p = 30
p1 = 0.9
p2 = 0.01

tables(S,T_max,lambda_p,p1,p2)

\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(50)   & Const(50)   & Pois(30)   & Binomial(0.9)   & Geom(0.01)   \\
\hline
 0            & R     & L          & L           & L          & L               & L            \\
 1            & R     & L          & L           & L          & L               & L            \\
 2            & R     & L          & L           & L          & L               & L            \\
 3            & R     & L          & L           & L          & L               & L            \\
 4            & R     & L          & L           & L          & L               & L            \\
 5            & R     & L          & L           & L          & L               & L            \\
 6            & R     & L          & L           & L          & L               & L            \\
 7            & R     & L          & L           & L          & L               & L            \\
 8            & R     & L          & L           & L          & L              

In [436]:
# Example where we always go left:
S = 20 # set number of states
T_max = 14 # set T_max
lambda_p = 7
p1 = 0.55
p2 = 0.15

tables(S,T_max,lambda_p,p1,p2)

No convergence in VI after:  1000  steps!
No convergence in VI after:  1000  steps!
No convergence in VI after:  1000  steps!
No convergence in VI after:  1000  steps!
\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(14)   & Const(14)   & Pois(7)   & Binomial(0.55)   & Geom(0.15)   \\
\hline
 0            & R     & L          & L           & L         & L                & L            \\
 1            & R     & L          & L           & L         & L                & L            \\
 2            & R     & L          & L           & L         & L                & L            \\
 3            & R     & L          & L           & L         & L                & L            \\
 4            & R     & L          & L           & L         & L                & L            \\
 5            & R     & L          & L           & L         & L                & R            \\
 6            & R     & L          & L           & L         & L                & R            \\
 7       