# State Value Functions for River Swim

In [1]:
import numpy as np

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.table import Table

matplotlib.use('Agg')

In [2]:
# generate grid of environment with state values
def draw_image(image):
    fig, ax = plt.subplots()
    ax.set_axis_off()
    tb = Table(ax, bbox=[0, 0, .8, .8])

    nrows, ncols = 1,image.shape[0]
    width, height = 1.0 / ncols, 1.0 / nrows

    # Add cells
    for j, val in np.ndenumerate(image):
        j = j[0]
        # add state labels
        if j == 0:
            val = str(val) + " (S)"
        elif j == (n-1):
            val = str(val) + " (L)"
        else:
            val = str(val) + " (N)"
        
        tb.add_cell(0, j, width, height, text=val,
                    loc='center', facecolor='white')
        

    # Row and column labels...
    for i in range(len(image)):
        tb.add_cell(-1, i, width, height/2, text=i, loc='center',
                    edgecolor='none', facecolor='none')

    ax.add_table(tb)

# model the environment in step function
def my_step(state, action, n, small, large):
    assert action in [0,1,2]
    assert state in range(n)
    
    actions = [(0,1),(1,0),(2,-1)] # (action, value) forward, stay, backward
    results = []
    
    for a, s in actions:
        if state == 0: # first state
            if a == 2: # we will ignore with 0 ACTION_PROB
                next_state = state
                reward = small
            elif a == 1:
                next_state = state + s
                reward = small
            elif a == 0:
                next_state = state + s
                reward = 0
            
        elif state == n-1: # last state
            if a == 0: # we will ignore with 0 ACTION_PROB
                next_state = state
                reward = large
            elif a == 1:
                next_state = state + s
                reward = large
            elif a == 2:
                next_state = state + s
                reward = 0

        else: # middle state
            next_state = state + s
            if next_state == 0:
                reward = small
            elif next_state == n-1:
                reward = large
            else:
                reward = 0
        
        results.append((a, next_state, reward))

    return results

In [3]:
n = 5
nA = 3
drift_backward = 0.05
drift_forward = 0.35
stay_in_place = 1 - (drift_backward + drift_forward)

probs_start = [drift_forward + drift_backward, stay_in_place, 0]
probs_mid = [drift_forward, stay_in_place, drift_backward]
probs_end = [0, stay_in_place, drift_backward + drift_forward]

small = 2
large = 10

DISCOUNT = 0.9

desc = np.append(np.array(["S"]), np.append(np.array(['N'] * (n - 2)),np.array(["L"])))

# State Value Function V(s)

In [7]:
value = np.zeros(n)
itera = 0
while True:
    itera += 1
    new_value = np.zeros_like(value)
    for i in range(n):
        for action in range(nA):
            if i == 0:
                res = my_step(i, action, n, small, large)
                new_value[i] = sum([(a[0] * (a[1][2] + DISCOUNT * value[a[1][1]])) for a in zip(probs_start,res)]) / nA
            
            elif i == n-1:
                res = my_step(i, action, n, small, large)
                new_value[i] = sum([(a[0] * (a[1][2] + DISCOUNT * value[a[1][1]])) for a in zip(probs_end,res)]) / nA
            
            else:
                res = my_step(i, action, n, small, large)
                new_value[i] = sum([(a[0] * (a[1][2] + DISCOUNT * value[a[1][1]])) for a in zip(probs_mid,res)]) / nA
            
    if np.sum(np.abs(value - new_value)) < 1e-4:
        draw_image(np.round(new_value, decimals=2))
        plt.savefig('../images/RS_figure_3_2.png')
        plt.close()
        break
    value = new_value
print(value)

[0.49936182 0.07902224 0.22839677 1.77245849 2.69839819]


# Optimal Value Function

In [8]:
value = np.zeros(n)
itera = 0
while True:
    itera += 1
    new_value = np.zeros_like(value)
    for i in range(n):
        values = []
        for action in range(nA):
            if i == 0:
                res = my_step(i, action, n, small, large)
                val = sum([(a[0] * (a[1][2] + DISCOUNT * value[a[1][1]])) for a in zip(probs_start,res)]) / nA
            elif i == n-1:
                res = my_step(i, action, n, small, large)
                val = sum([(a[0] * (a[1][2] + DISCOUNT * value[a[1][1]])) for a in zip(probs_end,res)]) / nA
            else:
                res = my_step(i, action, n, small, large)
                val = sum([(a[0] * (a[1][2] + DISCOUNT * value[a[1][1]])) for a in zip(probs_mid,res)]) / nA
            
            values.append(val)
            
        new_value[i] = np.max(values)
    if np.sum(np.abs(value - new_value)) < 1e-4:
        draw_image(np.round(new_value, decimals=2))
        plt.savefig('../images/RS_figure_3_5.png')
        plt.close()
        break
    value = new_value
print(value)

[0.49936182 0.07902224 0.22839677 1.77245849 2.69839819]
