Experiments on optimal policies in SMDP version of RiverSwim. 

In [149]:
# Load packages and utilities. 
from tabulate import tabulate
import numpy as np
import grid_world_class as gw
import riverswim_class as rs 
import riverswim_class_smdp as rs_s 

import UCRL2_L as ucrl
import UCRL_SMDP as ucrlS
import experiment_utils as utils
import importlib
importlib.reload(gw)
importlib.reload(rs)
importlib.reload(rs_s)
importlib.reload(ucrl)
importlib.reload(ucrlS)
importlib.reload(utils)
import matplotlib.pyplot as plt

With the tests being done, we proceed to write the crucial results (optimal gain and optimal policies) to latex tables. 

In [150]:
def tables(S,T_max,lambda_p,p1,p2):
    headers = ['Index/gain','MDP','Unif('+str(T_max)+')','Const('+str(T_max)+')',
            'Pois('+str(lambda_p)+')','Binomial('+str(p1)+')','Geom('+str(p2)+')']
    table = np.array(range(S)).reshape((S,1))


    env_mdp = utils.VI(rs.riverswim(nS=S, T_max=1)) # standard mdp)
    uniform = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='uniform',param=None) )#parameter not needed.
    constant = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='constant',param=None) )#parameter not needed.
    poisson = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='poisson',param=lambda_p)) #parameter not needed.
    binomial = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='binomial',param=p1)) #parameter not needed.
    geometric = utils.VI(rs_s.riverswim(nS=S, T_max=T_max,distribution='geometric',param=p2)) #parameter not needed.
    policies = np.array([env_mdp[2],uniform[2],constant[2],poisson[2],binomial[2],geometric[2]]).T
    policies = np.where(policies==0,'L','R')
    policies 
    gains = np.array(['Gain',np.round(env_mdp[3],3),np.round(uniform[3],3),
                    np.round(constant[3],3),np.round(poisson[3],3),
                    np.round(binomial[3],3),np.round(geometric[3],3)]).reshape((1,7))
    iterations =  np.array(['# Iterations',env_mdp[0],uniform[0],
                    constant[0],poisson[0],binomial[0],geometric[0]]).reshape((1,7))

    table = np.append(table,policies,axis = 1)
    table = np.append(table,iterations,axis=0)

    table=np.append(table,gains,axis=0)
    print(tabulate(table,headers=headers,tablefmt='latex',numalign="center"))



In [151]:
# Example where we always go right:
S = 20 # set number of states
T_max = 4 # set T_max
lambda_p = 4
p1 = 0.1
p2 = 0.75
tables(S,T_max,lambda_p,p1,p2)

\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(4)   & Const(4)   & Pois(4)   & Binomial(0.1)   & Geom(0.75)   \\
\hline
 0            & R     & R         & R          & R         & R               & R            \\
 1            & R     & R         & R          & R         & R               & R            \\
 2            & R     & R         & R          & R         & R               & R            \\
 3            & R     & R         & R          & R         & R               & R            \\
 4            & R     & R         & R          & R         & R               & R            \\
 5            & R     & R         & R          & R         & R               & R            \\
 6            & R     & R         & R          & R         & R               & R            \\
 7            & R     & R         & R          & R         & R               & R            \\
 8            & R     & R         & R          & R         & R               & R            \\
 9         

In [152]:
# Example where we always go left:
S = 20 # set number of states
T_max = 50 # set T_max
lambda_p = 30
p1 = 0.9
p2 = 0.01

tables(S,T_max,lambda_p,p1,p2)

\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(50)   & Const(50)   & Pois(30)   & Binomial(0.9)   & Geom(0.01)   \\
\hline
 0            & R     & L          & L           & L          & L               & L            \\
 1            & R     & L          & L           & L          & L               & L            \\
 2            & R     & L          & L           & L          & L               & L            \\
 3            & R     & L          & L           & L          & L               & L            \\
 4            & R     & L          & L           & L          & L               & L            \\
 5            & R     & L          & L           & L          & L               & L            \\
 6            & R     & L          & L           & L          & L               & L            \\
 7            & R     & L          & L           & L          & L               & L            \\
 8            & R     & L          & L           & L          & L              

In [181]:
# Example where we go a mixed direction:
S = 20 # set number of states
T_max = 18 # set T_max
lambda_p = 10
p1 = 0.55
p2 = 0.101

tables(S,T_max,lambda_p,p1,p2)

\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(18)   & Const(18)   & Pois(10)   & Binomial(0.53)   & Geom(0.101)   \\
\hline
 0            & R     & L          & L           & L          & L                & L             \\
 1            & R     & L          & L           & L          & L                & L             \\
 2            & R     & L          & L           & L          & L                & L             \\
 3            & R     & L          & L           & L          & L                & L             \\
 4            & R     & L          & L           & L          & L                & L             \\
 5            & R     & L          & L           & L          & L                & L             \\
 6            & R     & L          & L           & L          & L                & L             \\
 7            & R     & L          & L           & L          & L                & L             \\
 8            & R     & L          & L           & L         

Policy iteration as solver:


In [154]:
def tables_pi(S,T_max,lambda_p,p1,p2):
    headers = ['Index/gain','MDP','Unif('+str(T_max)+')','Const('+str(T_max)+')',
            'Pois('+str(lambda_p)+')','Binomial('+str(p1)+')','Geom('+str(p2)+')']
    table = np.array(range(S)).reshape((S,1))


    env_mdp = utils.PI(rs.riverswim(nS=S, T_max=1)) # standard mdp)
    uniform = utils.PI_SMDP(rs_s.riverswim(nS=S, T_max=T_max,distribution='uniform',param=None) )#parameter not needed.
    constant = utils.PI_SMDP(rs_s.riverswim(nS=S, T_max=T_max,distribution='constant',param=None) )#parameter not needed.
    poisson = utils.PI_SMDP(rs_s.riverswim(nS=S, T_max=T_max,distribution='poisson',param=lambda_p)) #parameter not needed.
    binomial = utils.PI_SMDP(rs_s.riverswim(nS=S, T_max=T_max,distribution='binomial',param=p1)) #parameter not needed.
    geometric = utils.PI_SMDP(rs_s.riverswim(nS=S, T_max=T_max,distribution='geometric',param=p2)) #parameter not needed.
    policies = np.array([env_mdp[2],uniform[2],constant[2],poisson[2],binomial[2],geometric[2]]).T
    policies = np.where(policies==0,'L','R')
    policies 
    gains = np.array(['Gain',np.round(env_mdp[3],3),np.round(uniform[3],3),
                    np.round(constant[3],3),np.round(poisson[3],3),
                    np.round(binomial[3],3),np.round(geometric[3],3)]).reshape((1,7))
    iterations =  np.array(['# Iterations',env_mdp[0],uniform[0],
                    constant[0],poisson[0],binomial[0],geometric[0]]).reshape((1,7))

    table = np.append(table,policies,axis = 1)
    table = np.append(table,iterations,axis=0)

    table=np.append(table,gains,axis=0)
    print(tabulate(table,headers=headers,tablefmt='latex',numalign="center"))

In [155]:
# Example where we always go right:
S = 20 # set number of states
T_max = 4 # set T_max
lambda_p = 4
p1 = 0.1
p2 = 0.75
tables_pi(S,T_max,lambda_p,p1,p2)

\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(4)   & Const(4)   & Pois(4)   & Binomial(0.1)   & Geom(0.75)   \\
\hline
 0            & R     & R         & R          & R         & R               & R            \\
 1            & R     & R         & R          & R         & R               & R            \\
 2            & R     & R         & R          & R         & R               & R            \\
 3            & R     & R         & R          & R         & R               & R            \\
 4            & R     & R         & R          & R         & R               & R            \\
 5            & R     & R         & R          & R         & R               & R            \\
 6            & R     & R         & R          & R         & R               & R            \\
 7            & R     & R         & R          & R         & R               & R            \\
 8            & R     & R         & R          & R         & R               & R            \\
 9         

In [190]:
# Example where we always go left:
S = 20 # set number of states
T_max = 50 # set T_max
lambda_p = 30
p1 = 0.9
p2 = 0.01

tables_pi(S,T_max,lambda_p,p1,p2)

\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(50)   & Const(50)   & Pois(30)   & Binomial(0.9)   & Geom(0.01)   \\
\hline
 0            & R     & L          & L           & L          & L               & L            \\
 1            & R     & L          & L           & L          & L               & L            \\
 2            & R     & L          & L           & L          & L               & L            \\
 3            & R     & L          & L           & L          & L               & L            \\
 4            & R     & L          & L           & L          & L               & L            \\
 5            & R     & L          & L           & L          & L               & L            \\
 6            & R     & L          & L           & L          & L               & L            \\
 7            & R     & L          & L           & L          & L               & L            \\
 8            & R     & L          & L           & L          & L              

In [182]:
# Example where policy is mixed:
S = 20 # set number of states
T_max = 18 # set T_max
lambda_p = 10
p1 = 0.55
p2 = 0.101


tables_pi(S,T_max,lambda_p,p1,p2)

\begin{tabular}{lllllll}
\hline
 Index/gain   & MDP   & Unif(18)   & Const(18)   & Pois(10)   & Binomial(0.55)   & Geom(0.101)   \\
\hline
 0            & R     & L          & L           & L          & L                & L             \\
 1            & R     & L          & L           & L          & L                & L             \\
 2            & R     & L          & L           & L          & L                & L             \\
 3            & R     & L          & L           & L          & L                & L             \\
 4            & R     & L          & L           & L          & L                & L             \\
 5            & R     & L          & L           & L          & L                & L             \\
 6            & R     & L          & L           & L          & L                & L             \\
 7            & R     & L          & L           & L          & L                & L             \\
 8            & R     & L          & L           & L         