In [93]:
import numpy as np

In [105]:
def V(T, u):
    '''Compute maximum possible present value.
    
    Args:
        T (int): max timestep -> t in {0, 1, ..., T}
        u (dict {action : reward}): dictionary mapping
            each action (the square moved to) to its reward
    '''

    # Allowable actions
    actions = {1:(2,4), 2:(1,3,5), 3:(2,6),
               4:(1,5,7), 5:(2,4,6,8), 6:(3,5,9),
               7:(4,8), 8:(5,7,9), 9:()}

    # Dict of returns, to be updated at each timestep
    returns = {space : 0 for space in range(1, 9+1)}
    current = returns.copy()

    # Iterate through timesteps
    for t in range(T+1):
        # Update returns, one space at a time
        for space in set(returns) - {9}:
            current[space] = max([u[action] + returns[action] for action in actions[space]])
        returns = current.copy()
    
    return returns

# Init dict of reward
u = {action : -1 for action in range(1, 9+1)}
u[2] = 0.7

T = 3
{space : round(value, 1) for space, value in V(T, u).items()}

{1: -0.6, 2: -0.6, 3: -0.6, 4: -0.6, 5: -0.6, 6: -0.6, 7: -2, 8: -0.6, 9: 0}

In [104]:
# Header
print('T/s', *[i for i in range(1, 9+1)], sep='\t')

# First t to init prev
returns = V(0, u)
prev = np.array([round(returns[s], 1) for s in range(1, 10)])
print(0, *prev, sep='\t')

# Iterate through t
for t in range(1, 30):
    # Get returns
    returns = V(t, u)
    curr = np.array([round(returns[s], 1) for s in range(1, 10)])
    print(t, *curr, sep='\t')

    # Check for previous and current returns remaining unchanged
    if all(prev == curr):
        print(True)
    prev = curr.copy()

T/s	1	2	3	4	5	6	7	8	9
0	0.7	-1.0	0.7	-1.0	0.7	-1.0	-1.0	-1.0	0.0
1	-0.3	-0.3	-0.3	-0.3	-0.3	-0.3	-2.0	-0.3	0.0
2	0.4	-1.3	0.4	-1.3	0.4	-1.0	-1.3	-1.0	0.0
3	-0.6	-0.6	-0.6	-0.6	-0.6	-0.6	-2.0	-0.6	0.0
4	0.1	-1.6	0.1	-1.6	0.1	-1.0	-1.6	-1.0	0.0
5	-0.9	-0.9	-0.9	-0.9	-0.9	-0.9	-2.0	-0.9	0.0
6	-0.2	-1.9	-0.2	-1.9	-0.2	-1.0	-1.9	-1.0	0.0
7	-1.2	-1.2	-1.2	-1.2	-1.2	-1.0	-2.0	-1.0	0.0
8	-0.5	-2.2	-0.5	-2.2	-0.5	-1.0	-2.0	-1.0	0.0
9	-1.5	-1.5	-1.5	-1.5	-1.5	-1.0	-2.0	-1.0	0.0
10	-0.8	-2.5	-0.8	-2.5	-0.8	-1.0	-2.0	-1.0	0.0
11	-1.8	-1.8	-1.8	-1.8	-1.8	-1.0	-2.0	-1.0	0.0
12	-1.1	-2.8	-1.1	-2.8	-1.1	-1.0	-2.0	-1.0	0.0
13	-2.1	-2.1	-2.0	-2.1	-2.0	-1.0	-2.0	-1.0	0.0
14	-1.4	-3.0	-1.4	-3.0	-1.4	-1.0	-2.0	-1.0	0.0
15	-2.3	-2.4	-2.0	-2.4	-2.0	-1.0	-2.0	-1.0	0.0
16	-1.7	-3.0	-1.7	-3.0	-1.7	-1.0	-2.0	-1.0	0.0
17	-2.3	-2.7	-2.0	-2.7	-2.0	-1.0	-2.0	-1.0	0.0
18	-2.0	-3.0	-2.0	-3.0	-2.0	-1.0	-2.0	-1.0	0.0
19	-2.3	-3.0	-2.0	-3.0	-2.0	-1.0	-2.0	-1.0	0.0
20	-2.3	-3.0	-2.0	-3.0	-2.0	-1.0	-2.0	-1.0	0.0
True
21	-2