In [1]:
import numpy as np
import grid_world
import utils

In [2]:
ACTION_SPACE = ('U', 'D', 'L', 'R')

np.random.seed(123)

def generate_random_policy(grid):
    P = {}
    for s in grid.non_terminal_states():
        P[s] = np.random.choice(ACTION_SPACE)
    return P

def random_policy_iteration(grid, threshold = 0):
    np.random.seed(123)
    policy = {}
    while True:
        grid.set_state((2,0))
        policy = generate_random_policy(grid)
        total_reward = 0
        nsteps = 0
        while not (grid.game_over() or nsteps > 20):
            total_reward += grid.move(policy[grid.current_state()])
            nsteps += 1
        utils.print_policy(policy, grid)
        print()
        if total_reward > threshold:
            break
    return policy, total_reward

In [3]:
myenv = grid_world.standard_grid(obey_prob=1.0,step_cost=None)
good_policy, reward = random_policy_iteration(myenv)
print("Best Policy: {0} reward".format(reward))
utils.print_policy(good_policy, myenv)

---------------------------
  L  |  D  |  L  |     |
---------------------------
  L  |     |  U  |     |
---------------------------
  L  |  L  |  D  |  R  |

---------------------------
  D  |  L  |  L  |     |
---------------------------
  R  |     |  L  |     |
---------------------------
  D  |  U  |  U  |  R  |

---------------------------
  L  |  D  |  D  |     |
---------------------------
  R  |     |  U  |     |
---------------------------
  D  |  D  |  U  |  U  |

---------------------------
  D  |  L  |  L  |     |
---------------------------
  R  |     |  R  |     |
---------------------------
  U  |  D  |  D  |  D  |

---------------------------
  L  |  D  |  L  |     |
---------------------------
  L  |     |  U  |     |
---------------------------
  R  |  L  |  U  |  U  |

---------------------------
  U  |  U  |  R  |     |
---------------------------
  L  |     |  R  |     |
---------------------------
  D  |  L  |  L  |  R  |

---------------------------
  D  |  D  |

  R  |  D  |  R  |  R  |

---------------------------
  L  |  L  |  U  |     |
---------------------------
  R  |     |  L  |     |
---------------------------
  L  |  U  |  R  |  L  |

---------------------------
  L  |  U  |  U  |     |
---------------------------
  R  |     |  U  |     |
---------------------------
  D  |  U  |  L  |  L  |

---------------------------
  L  |  L  |  L  |     |
---------------------------
  D  |     |  D  |     |
---------------------------
  L  |  R  |  R  |  U  |

---------------------------
  R  |  R  |  L  |     |
---------------------------
  U  |     |  R  |     |
---------------------------
  L  |  R  |  U  |  D  |

---------------------------
  R  |  U  |  U  |     |
---------------------------
  D  |     |  U  |     |
---------------------------
  D  |  L  |  L  |  U  |

---------------------------
  D  |  D  |  L  |     |
---------------------------
  U  |     |  U  |     |
---------------------------
  D  |  L  |  L  |  R  |

--------------

  R  |  U  |  D  |  U  |

---------------------------
  R  |  U  |  U  |     |
---------------------------
  U  |     |  U  |     |
---------------------------
  L  |  R  |  R  |  U  |

---------------------------
  R  |  L  |  D  |     |
---------------------------
  R  |     |  R  |     |
---------------------------
  R  |  R  |  R  |  R  |

---------------------------
  D  |  U  |  L  |     |
---------------------------
  U  |     |  R  |     |
---------------------------
  R  |  D  |  L  |  U  |

---------------------------
  L  |  D  |  U  |     |
---------------------------
  L  |     |  L  |     |
---------------------------
  U  |  U  |  U  |  D  |

---------------------------
  R  |  R  |  D  |     |
---------------------------
  U  |     |  D  |     |
---------------------------
  L  |  U  |  D  |  L  |

---------------------------
  R  |  U  |  L  |     |
---------------------------
  R  |     |  U  |     |
---------------------------
  L  |  D  |  L  |  R  |

--------------

  D  |  R  |  L  |  L  |

---------------------------
  L  |  D  |  R  |     |
---------------------------
  L  |     |  L  |     |
---------------------------
  R  |  L  |  L  |  D  |

---------------------------
  D  |  L  |  R  |     |
---------------------------
  U  |     |  R  |     |
---------------------------
  U  |  D  |  L  |  U  |

---------------------------
  L  |  U  |  L  |     |
---------------------------
  L  |     |  L  |     |
---------------------------
  U  |  U  |  D  |  D  |

---------------------------
  D  |  L  |  L  |     |
---------------------------
  U  |     |  R  |     |
---------------------------
  L  |  U  |  D  |  U  |

---------------------------
  R  |  U  |  R  |     |
---------------------------
  D  |     |  U  |     |
---------------------------
  R  |  R  |  D  |  L  |

---------------------------
  D  |  R  |  R  |     |
---------------------------
  R  |     |  R  |     |
---------------------------
  R  |  L  |  L  |  L  |

--------------

  D  |  R  |  D  |  D  |

---------------------------
  R  |  U  |  R  |     |
---------------------------
  R  |     |  L  |     |
---------------------------
  L  |  R  |  R  |  D  |

---------------------------
  R  |  R  |  R  |     |
---------------------------
  U  |     |  R  |     |
---------------------------
  R  |  D  |  L  |  R  |

---------------------------
  L  |  R  |  R  |     |
---------------------------
  U  |     |  D  |     |
---------------------------
  U  |  D  |  D  |  R  |

---------------------------
  R  |  L  |  R  |     |
---------------------------
  U  |     |  D  |     |
---------------------------
  U  |  L  |  L  |  R  |

---------------------------
  R  |  D  |  L  |     |
---------------------------
  D  |     |  D  |     |
---------------------------
  U  |  R  |  R  |  D  |

---------------------------
  R  |  R  |  D  |     |
---------------------------
  D  |     |  U  |     |
---------------------------
  D  |  R  |  D  |  L  |

--------------

---------------------------
  L  |  R  |  R  |     |
---------------------------
  L  |     |  L  |     |
---------------------------
  U  |  D  |  R  |  L  |

---------------------------
  U  |  D  |  R  |     |
---------------------------
  D  |     |  D  |     |
---------------------------
  D  |  R  |  D  |  D  |

---------------------------
  D  |  D  |  R  |     |
---------------------------
  L  |     |  U  |     |
---------------------------
  L  |  R  |  R  |  D  |

---------------------------
  L  |  U  |  U  |     |
---------------------------
  L  |     |  U  |     |
---------------------------
  D  |  L  |  D  |  L  |

---------------------------
  R  |  U  |  L  |     |
---------------------------
  D  |     |  U  |     |
---------------------------
  D  |  D  |  R  |  R  |

---------------------------
  L  |  D  |  L  |     |
---------------------------
  L  |     |  L  |     |
---------------------------
  U  |  U  |  D  |  R  |

---------------------------
  R  |  D  |

  U  |  R  |  U  |     |
---------------------------
  R  |     |  U  |     |
---------------------------
  D  |  U  |  D  |  D  |

---------------------------
  U  |  D  |  D  |     |
---------------------------
  U  |     |  L  |     |
---------------------------
  R  |  L  |  R  |  L  |

---------------------------
  L  |  D  |  D  |     |
---------------------------
  U  |     |  D  |     |
---------------------------
  R  |  L  |  L  |  R  |

---------------------------
  U  |  L  |  D  |     |
---------------------------
  U  |     |  U  |     |
---------------------------
  L  |  U  |  L  |  D  |

---------------------------
  D  |  L  |  R  |     |
---------------------------
  D  |     |  R  |     |
---------------------------
  D  |  L  |  L  |  L  |

---------------------------
  D  |  R  |  L  |     |
---------------------------
  U  |     |  D  |     |
---------------------------
  D  |  R  |  D  |  L  |

---------------------------
  L  |  L  |  D  |     |
---------------

  R  |  U  |  L  |     |
---------------------------
  D  |     |  L  |     |
---------------------------
  R  |  D  |  D  |  L  |

---------------------------
  U  |  L  |  L  |     |
---------------------------
  R  |     |  R  |     |
---------------------------
  L  |  R  |  D  |  L  |

---------------------------
  L  |  U  |  R  |     |
---------------------------
  L  |     |  R  |     |
---------------------------
  R  |  U  |  U  |  L  |

---------------------------
  R  |  U  |  D  |     |
---------------------------
  R  |     |  U  |     |
---------------------------
  D  |  L  |  R  |  L  |

---------------------------
  L  |  D  |  L  |     |
---------------------------
  U  |     |  D  |     |
---------------------------
  L  |  L  |  D  |  U  |

---------------------------
  L  |  U  |  L  |     |
---------------------------
  D  |     |  U  |     |
---------------------------
  U  |  L  |  R  |  L  |

---------------------------
  R  |  R  |  R  |     |
---------------

In [4]:
newenv = grid_world.Grid(5, 5, (3, 0))
rewards = {(0, 4): 1, (2, 4): -1}
actions = {
    (0, 0): ('D', 'R'),
    (0, 1): ('L', 'R'),
    (0, 2): ('L', 'D', 'R'),
    (0, 3): ('L', 'R'),
    (1, 0): ('U', 'D'),
    (1, 2): ('U', 'D'),
    (1, 4): ('U', 'D'),
    (2, 0): ('U', 'D', 'R'),
    (2, 1): ('L', 'R'),
    (2, 2): ('L', 'R', 'U', 'D'),
    (2, 3): ('L', 'R'),
    (3, 0): ('D', 'U'),
    (3, 2): ('D', 'U'),
    (3, 4): ('D', 'U'),
    (4, 0): ('R', 'U'),
    (4, 1): ('R', 'L'),
    (4, 2): ('L', 'R', 'U'),
    (4, 3): ('R', 'L'),
    (4, 4): ('L', 'U'),
}
newenv.set(rewards, actions, 1)
good_policy, reward = random_policy_iteration(newenv)
print("Best Policy: {0} reward".format(reward))
utils.print_policy(good_policy, newenv)

---------------------------
  L  |  D  |  L  |  L  |     |
---------------------------
  U  |     |  L  |     |  L  |
---------------------------
  D  |  R  |  L  |  R  |     |
---------------------------
  D  |     |  L  |     |  D  |
---------------------------
  U  |  D  |  L  |  R  |  D  |

---------------------------
  D  |  U  |  R  |  L  |     |
---------------------------
  L  |     |  L  |     |  L  |
---------------------------
  D  |  L  |  U  |  R  |     |
---------------------------
  R  |     |  R  |     |  D  |
---------------------------
  U  |  L  |  U  |  L  |  D  |

---------------------------
  R  |  R  |  R  |  R  |     |
---------------------------
  U  |     |  L  |     |  L  |
---------------------------
  D  |  R  |  L  |  L  |     |
---------------------------
  R  |     |  D  |     |  L  |
---------------------------
  U  |  L  |  R  |  L  |  D  |

---------------------------
  L  |  D  |  U  |  U  |     |
---------------------------
  R  |     |  U  |     | 

  R  |  U  |  L  |  U  |     |
---------------------------
  L  |     |  R  |     |  D  |
---------------------------
  D  |  R  |  U  |  D  |  R  |

---------------------------
  L  |  D  |  R  |  U  |     |
---------------------------
  R  |     |  L  |     |  R  |
---------------------------
  L  |  D  |  L  |  D  |     |
---------------------------
  L  |     |  D  |     |  L  |
---------------------------
  D  |  U  |  D  |  R  |  U  |

---------------------------
  U  |  U  |  D  |  L  |     |
---------------------------
  D  |     |  L  |     |  U  |
---------------------------
  U  |  U  |  D  |  U  |     |
---------------------------
  U  |     |  D  |     |  U  |
---------------------------
  U  |  D  |  R  |  L  |  U  |

---------------------------
  U  |  L  |  L  |  L  |     |
---------------------------
  D  |     |  L  |     |  D  |
---------------------------
  R  |  L  |  L  |  L  |     |
---------------------------
  L  |     |  R  |     |  D  |
----------------------

  R  |     |  D  |     |  D  |
---------------------------
  D  |  U  |  D  |  R  |  L  |

---------------------------
  L  |  R  |  U  |  U  |     |
---------------------------
  D  |     |  D  |     |  L  |
---------------------------
  U  |  L  |  D  |  U  |     |
---------------------------
  R  |     |  R  |     |  U  |
---------------------------
  D  |  D  |  D  |  D  |  D  |

---------------------------
  D  |  U  |  L  |  U  |     |
---------------------------
  U  |     |  R  |     |  L  |
---------------------------
  U  |  U  |  L  |  D  |     |
---------------------------
  R  |     |  D  |     |  R  |
---------------------------
  L  |  U  |  D  |  R  |  L  |

---------------------------
  U  |  L  |  R  |  L  |     |
---------------------------
  D  |     |  U  |     |  U  |
---------------------------
  D  |  U  |  D  |  L  |     |
---------------------------
  D  |     |  D  |     |  U  |
---------------------------
  R  |  D  |  R  |  L  |  U  |

---------------------

  U  |  R  |  R  |  L  |     |
---------------------------
  L  |     |  R  |     |  U  |
---------------------------
  U  |  R  |  L  |  U  |     |
---------------------------
  U  |     |  L  |     |  D  |
---------------------------
  U  |  D  |  D  |  U  |  D  |

---------------------------
  U  |  R  |  L  |  L  |     |
---------------------------
  D  |     |  U  |     |  R  |
---------------------------
  U  |  U  |  U  |  U  |     |
---------------------------
  L  |     |  L  |     |  L  |
---------------------------
  L  |  R  |  R  |  D  |  L  |

---------------------------
  R  |  L  |  D  |  R  |     |
---------------------------
  R  |     |  U  |     |  R  |
---------------------------
  U  |  L  |  D  |  L  |     |
---------------------------
  R  |     |  R  |     |  D  |
---------------------------
  R  |  U  |  L  |  R  |  D  |

---------------------------
  L  |  U  |  U  |  R  |     |
---------------------------
  U  |     |  D  |     |  R  |
----------------------

---------------------------
  L  |  L  |  L  |  U  |     |
---------------------------
  R  |     |  D  |     |  U  |
---------------------------
  L  |  D  |  U  |  D  |  R  |

---------------------------
  U  |  R  |  R  |  L  |     |
---------------------------
  D  |     |  L  |     |  R  |
---------------------------
  D  |  D  |  U  |  L  |     |
---------------------------
  D  |     |  D  |     |  R  |
---------------------------
  L  |  U  |  L  |  D  |  L  |

---------------------------
  L  |  U  |  R  |  L  |     |
---------------------------
  R  |     |  R  |     |  D  |
---------------------------
  U  |  U  |  R  |  U  |     |
---------------------------
  R  |     |  R  |     |  L  |
---------------------------
  D  |  L  |  D  |  R  |  R  |

---------------------------
  D  |  L  |  L  |  D  |     |
---------------------------
  D  |     |  U  |     |  U  |
---------------------------
  L  |  U  |  D  |  L  |     |
---------------------------
  U  |     |  U  |     | 

  L  |     |  U  |     |  U  |
---------------------------
  U  |  R  |  L  |  D  |  R  |

---------------------------
  R  |  U  |  U  |  U  |     |
---------------------------
  R  |     |  R  |     |  D  |
---------------------------
  D  |  U  |  R  |  D  |     |
---------------------------
  L  |     |  L  |     |  U  |
---------------------------
  R  |  D  |  R  |  D  |  R  |

---------------------------
  D  |  D  |  D  |  R  |     |
---------------------------
  U  |     |  L  |     |  U  |
---------------------------
  L  |  R  |  R  |  R  |     |
---------------------------
  R  |     |  R  |     |  L  |
---------------------------
  L  |  U  |  D  |  L  |  D  |

---------------------------
  R  |  R  |  R  |  D  |     |
---------------------------
  R  |     |  R  |     |  R  |
---------------------------
  D  |  R  |  R  |  R  |     |
---------------------------
  U  |     |  U  |     |  R  |
---------------------------
  L  |  R  |  R  |  L  |  D  |

---------------------

  D  |  L  |  R  |  D  |     |
---------------------------
  L  |     |  L  |     |  D  |
---------------------------
  U  |  L  |  U  |  L  |     |
---------------------------
  L  |     |  R  |     |  U  |
---------------------------
  R  |  D  |  R  |  L  |  D  |

---------------------------
  L  |  U  |  U  |  R  |     |
---------------------------
  R  |     |  L  |     |  L  |
---------------------------
  U  |  D  |  D  |  U  |     |
---------------------------
  U  |     |  D  |     |  R  |
---------------------------
  R  |  D  |  L  |  R  |  D  |

---------------------------
  U  |  L  |  U  |  D  |     |
---------------------------
  L  |     |  D  |     |  L  |
---------------------------
  R  |  U  |  U  |  L  |     |
---------------------------
  R  |     |  R  |     |  R  |
---------------------------
  L  |  U  |  R  |  D  |  L  |

---------------------------
  U  |  R  |  D  |  U  |     |
---------------------------
  U  |     |  U  |     |  U  |
----------------------

  L  |     |  D  |     |  L  |
---------------------------
  U  |  L  |  U  |  L  |     |
---------------------------
  L  |     |  L  |     |  L  |
---------------------------
  R  |  R  |  L  |  D  |  L  |

---------------------------
  L  |  U  |  D  |  U  |     |
---------------------------
  U  |     |  R  |     |  R  |
---------------------------
  R  |  R  |  U  |  U  |     |
---------------------------
  L  |     |  R  |     |  R  |
---------------------------
  D  |  U  |  D  |  D  |  U  |

---------------------------
  R  |  R  |  R  |  L  |     |
---------------------------
  D  |     |  L  |     |  L  |
---------------------------
  R  |  R  |  U  |  R  |     |
---------------------------
  U  |     |  R  |     |  L  |
---------------------------
  L  |  D  |  R  |  U  |  U  |

---------------------------
  D  |  D  |  L  |  L  |     |
---------------------------
  U  |     |  L  |     |  R  |
---------------------------
  R  |  D  |  L  |  R  |     |
----------------------

  U  |     |  L  |     |  D  |
---------------------------
  R  |  D  |  U  |  L  |  D  |

---------------------------
  L  |  L  |  R  |  D  |     |
---------------------------
  D  |     |  R  |     |  R  |
---------------------------
  L  |  L  |  R  |  R  |     |
---------------------------
  L  |     |  U  |     |  R  |
---------------------------
  D  |  U  |  R  |  D  |  L  |

---------------------------
  R  |  R  |  L  |  R  |     |
---------------------------
  L  |     |  R  |     |  L  |
---------------------------
  D  |  R  |  L  |  R  |     |
---------------------------
  U  |     |  R  |     |  L  |
---------------------------
  D  |  D  |  D  |  L  |  D  |

---------------------------
  R  |  R  |  D  |  R  |     |
---------------------------
  R  |     |  L  |     |  U  |
---------------------------
  R  |  L  |  D  |  D  |     |
---------------------------
  R  |     |  U  |     |  R  |
---------------------------
  L  |  L  |  R  |  R  |  D  |

---------------------

---------------------------
  R  |  L  |  U  |  U  |     |
---------------------------
  D  |     |  L  |     |  L  |
---------------------------
  L  |  U  |  D  |  R  |     |
---------------------------
  L  |     |  D  |     |  D  |
---------------------------
  R  |  D  |  R  |  R  |  D  |

---------------------------
  L  |  U  |  D  |  D  |     |
---------------------------
  L  |     |  L  |     |  L  |
---------------------------
  R  |  R  |  U  |  D  |     |
---------------------------
  R  |     |  U  |     |  D  |
---------------------------
  D  |  L  |  R  |  R  |  L  |

---------------------------
  R  |  D  |  L  |  R  |     |
---------------------------
  D  |     |  U  |     |  L  |
---------------------------
  D  |  R  |  L  |  D  |     |
---------------------------
  R  |     |  U  |     |  R  |
---------------------------
  R  |  R  |  D  |  U  |  R  |

---------------------------
  R  |  R  |  D  |  U  |     |
---------------------------
  D  |     |  D  |     | 

  D  |     |  R  |     |  D  |
---------------------------
  R  |  R  |  R  |  R  |     |
---------------------------
  R  |     |  U  |     |  U  |
---------------------------
  U  |  L  |  L  |  L  |  D  |

---------------------------
  U  |  L  |  L  |  D  |     |
---------------------------
  U  |     |  L  |     |  U  |
---------------------------
  U  |  D  |  U  |  D  |     |
---------------------------
  R  |     |  R  |     |  U  |
---------------------------
  L  |  L  |  U  |  R  |  U  |

---------------------------
  L  |  R  |  U  |  D  |     |
---------------------------
  R  |     |  U  |     |  L  |
---------------------------
  D  |  D  |  U  |  U  |     |
---------------------------
  D  |     |  D  |     |  U  |
---------------------------
  U  |  L  |  L  |  U  |  D  |

---------------------------
  D  |  U  |  D  |  L  |     |
---------------------------
  L  |     |  L  |     |  L  |
---------------------------
  D  |  L  |  U  |  R  |     |
----------------------

  R  |     |  D  |     |  D  |
---------------------------
  R  |  U  |  L  |  D  |  U  |

---------------------------
  D  |  D  |  R  |  R  |     |
---------------------------
  D  |     |  U  |     |  L  |
---------------------------
  R  |  L  |  D  |  D  |     |
---------------------------
  U  |     |  L  |     |  D  |
---------------------------
  L  |  R  |  L  |  L  |  U  |

---------------------------
  L  |  U  |  D  |  D  |     |
---------------------------
  L  |     |  U  |     |  U  |
---------------------------
  R  |  R  |  R  |  U  |     |
---------------------------
  U  |     |  D  |     |  D  |
---------------------------
  R  |  L  |  R  |  U  |  L  |

---------------------------
  L  |  R  |  R  |  R  |     |
---------------------------
  L  |     |  R  |     |  L  |
---------------------------
  R  |  D  |  L  |  L  |     |
---------------------------
  L  |     |  R  |     |  R  |
---------------------------
  U  |  L  |  L  |  L  |  L  |

---------------------


---------------------------
  D  |  L  |  D  |  R  |     |
---------------------------
  R  |     |  U  |     |  U  |
---------------------------
  D  |  D  |  R  |  U  |     |
---------------------------
  R  |     |  U  |     |  R  |
---------------------------
  U  |  R  |  U  |  D  |  L  |

---------------------------
  D  |  R  |  L  |  U  |     |
---------------------------
  U  |     |  D  |     |  L  |
---------------------------
  L  |  R  |  U  |  L  |     |
---------------------------
  R  |     |  D  |     |  D  |
---------------------------
  U  |  D  |  R  |  D  |  L  |

---------------------------
  L  |  D  |  L  |  D  |     |
---------------------------
  R  |     |  R  |     |  L  |
---------------------------
  L  |  L  |  L  |  D  |     |
---------------------------
  D  |     |  D  |     |  L  |
---------------------------
  R  |  L  |  L  |  U  |  U  |

---------------------------
  D  |  D  |  D  |  R  |     |
---------------------------
  L  |     |  R  |     |

  U  |     |  D  |     |  R  |
---------------------------
  R  |  R  |  U  |  R  |     |
---------------------------
  R  |     |  U  |     |  D  |
---------------------------
  R  |  L  |  D  |  U  |  L  |

---------------------------
  R  |  U  |  U  |  D  |     |
---------------------------
  D  |     |  L  |     |  L  |
---------------------------
  D  |  D  |  L  |  U  |     |
---------------------------
  U  |     |  R  |     |  U  |
---------------------------
  U  |  D  |  D  |  L  |  L  |

---------------------------
  L  |  U  |  R  |  U  |     |
---------------------------
  D  |     |  U  |     |  L  |
---------------------------
  D  |  U  |  U  |  L  |     |
---------------------------
  R  |     |  D  |     |  U  |
---------------------------
  D  |  D  |  L  |  D  |  L  |

---------------------------
  L  |  D  |  L  |  L  |     |
---------------------------
  D  |     |  L  |     |  R  |
---------------------------
  D  |  R  |  U  |  U  |     |
----------------------

  L  |  D  |  L  |  U  |     |
---------------------------
  L  |     |  R  |     |  R  |
---------------------------
  L  |  R  |  L  |  R  |  R  |

---------------------------
  D  |  U  |  U  |  R  |     |
---------------------------
  D  |     |  R  |     |  L  |
---------------------------
  R  |  R  |  U  |  L  |     |
---------------------------
  L  |     |  U  |     |  U  |
---------------------------
  L  |  D  |  L  |  U  |  R  |

---------------------------
  D  |  R  |  D  |  R  |     |
---------------------------
  U  |     |  R  |     |  D  |
---------------------------
  D  |  R  |  L  |  U  |     |
---------------------------
  R  |     |  D  |     |  L  |
---------------------------
  D  |  D  |  U  |  R  |  R  |

---------------------------
  U  |  D  |  D  |  D  |     |
---------------------------
  D  |     |  R  |     |  D  |
---------------------------
  L  |  R  |  U  |  D  |     |
---------------------------
  R  |     |  L  |     |  L  |
----------------------

  L  |     |  D  |     |  U  |
---------------------------
  R  |  L  |  L  |  L  |  L  |

---------------------------
  R  |  D  |  L  |  R  |     |
---------------------------
  R  |     |  R  |     |  U  |
---------------------------
  L  |  R  |  D  |  U  |     |
---------------------------
  D  |     |  U  |     |  R  |
---------------------------
  U  |  L  |  L  |  L  |  D  |

---------------------------
  L  |  L  |  D  |  U  |     |
---------------------------
  D  |     |  R  |     |  D  |
---------------------------
  D  |  U  |  U  |  R  |     |
---------------------------
  D  |     |  U  |     |  D  |
---------------------------
  D  |  D  |  U  |  U  |  R  |

---------------------------
  D  |  U  |  U  |  D  |     |
---------------------------
  U  |     |  L  |     |  D  |
---------------------------
  L  |  D  |  R  |  D  |     |
---------------------------
  D  |     |  R  |     |  R  |
---------------------------
  R  |  U  |  L  |  D  |  R  |

---------------------

  D  |  L  |  R  |  D  |  D  |

---------------------------
  L  |  L  |  R  |  R  |     |
---------------------------
  U  |     |  U  |     |  L  |
---------------------------
  R  |  D  |  U  |  D  |     |
---------------------------
  D  |     |  L  |     |  R  |
---------------------------
  R  |  L  |  D  |  U  |  R  |

---------------------------
  L  |  L  |  L  |  D  |     |
---------------------------
  D  |     |  L  |     |  R  |
---------------------------
  U  |  L  |  U  |  U  |     |
---------------------------
  R  |     |  R  |     |  U  |
---------------------------
  R  |  U  |  D  |  D  |  U  |

---------------------------
  L  |  D  |  L  |  U  |     |
---------------------------
  D  |     |  D  |     |  R  |
---------------------------
  L  |  R  |  L  |  R  |     |
---------------------------
  L  |     |  R  |     |  U  |
---------------------------
  U  |  R  |  U  |  L  |  R  |

---------------------------
  U  |  L  |  D  |  U  |     |
---------------------

---------------------------
  D  |  R  |  L  |  D  |     |
---------------------------
  U  |     |  D  |     |  D  |
---------------------------
  L  |  R  |  D  |  R  |     |
---------------------------
  D  |     |  D  |     |  R  |
---------------------------
  D  |  R  |  R  |  L  |  L  |

---------------------------
  U  |  R  |  D  |  R  |     |
---------------------------
  L  |     |  R  |     |  U  |
---------------------------
  D  |  U  |  L  |  U  |     |
---------------------------
  R  |     |  U  |     |  D  |
---------------------------
  L  |  L  |  U  |  R  |  U  |

---------------------------
  D  |  L  |  U  |  U  |     |
---------------------------
  R  |     |  D  |     |  U  |
---------------------------
  L  |  L  |  U  |  D  |     |
---------------------------
  D  |     |  D  |     |  U  |
---------------------------
  U  |  R  |  L  |  U  |  L  |

---------------------------
  L  |  U  |  L  |  L  |     |
---------------------------
  R  |     |  D  |     | 

  L  |  U  |  R  |  U  |     |
---------------------------
  U  |     |  U  |     |  U  |
---------------------------
  L  |  U  |  L  |  U  |     |
---------------------------
  D  |     |  R  |     |  U  |
---------------------------
  R  |  L  |  U  |  U  |  D  |

---------------------------
  R  |  R  |  R  |  L  |     |
---------------------------
  D  |     |  U  |     |  R  |
---------------------------
  D  |  L  |  R  |  D  |     |
---------------------------
  L  |     |  L  |     |  R  |
---------------------------
  L  |  R  |  R  |  R  |  L  |

---------------------------
  L  |  U  |  R  |  L  |     |
---------------------------
  D  |     |  R  |     |  L  |
---------------------------
  U  |  L  |  U  |  R  |     |
---------------------------
  R  |     |  U  |     |  D  |
---------------------------
  R  |  D  |  L  |  L  |  U  |

---------------------------
  R  |  U  |  U  |  R  |     |
---------------------------
  U  |     |  R  |     |  R  |
----------------------

  U  |     |  U  |     |  L  |
---------------------------
  R  |  U  |  U  |  U  |     |
---------------------------
  U  |     |  R  |     |  L  |
---------------------------
  D  |  U  |  R  |  L  |  R  |

---------------------------
  U  |  D  |  U  |  L  |     |
---------------------------
  U  |     |  R  |     |  R  |
---------------------------
  L  |  L  |  D  |  U  |     |
---------------------------
  U  |     |  U  |     |  D  |
---------------------------
  D  |  R  |  L  |  L  |  L  |

---------------------------
  U  |  L  |  U  |  U  |     |
---------------------------
  U  |     |  L  |     |  U  |
---------------------------
  U  |  L  |  R  |  L  |     |
---------------------------
  R  |     |  L  |     |  D  |
---------------------------
  L  |  R  |  D  |  U  |  R  |

---------------------------
  D  |  R  |  R  |  L  |     |
---------------------------
  R  |     |  R  |     |  R  |
---------------------------
  R  |  U  |  R  |  U  |     |
----------------------

  U  |  D  |  U  |  L  |     |
---------------------------
  D  |     |  R  |     |  R  |
---------------------------
  U  |  U  |  L  |  D  |  R  |

---------------------------
  L  |  U  |  L  |  R  |     |
---------------------------
  R  |     |  U  |     |  L  |
---------------------------
  D  |  U  |  L  |  U  |     |
---------------------------
  R  |     |  L  |     |  L  |
---------------------------
  R  |  L  |  U  |  U  |  U  |

---------------------------
  R  |  R  |  U  |  D  |     |
---------------------------
  L  |     |  L  |     |  D  |
---------------------------
  D  |  L  |  R  |  R  |     |
---------------------------
  L  |     |  D  |     |  U  |
---------------------------
  U  |  D  |  D  |  R  |  R  |

---------------------------
  U  |  L  |  D  |  D  |     |
---------------------------
  L  |     |  D  |     |  R  |
---------------------------
  L  |  U  |  L  |  L  |     |
---------------------------
  U  |     |  U  |     |  U  |
----------------------

  L  |     |  R  |     |  L  |
---------------------------
  L  |  R  |  L  |  U  |  R  |

---------------------------
  L  |  U  |  U  |  D  |     |
---------------------------
  L  |     |  R  |     |  L  |
---------------------------
  L  |  D  |  L  |  L  |     |
---------------------------
  R  |     |  L  |     |  D  |
---------------------------
  D  |  D  |  R  |  U  |  R  |

---------------------------
  R  |  L  |  U  |  R  |     |
---------------------------
  D  |     |  R  |     |  L  |
---------------------------
  R  |  R  |  D  |  U  |     |
---------------------------
  L  |     |  D  |     |  L  |
---------------------------
  U  |  L  |  R  |  D  |  D  |

---------------------------
  U  |  L  |  R  |  U  |     |
---------------------------
  D  |     |  U  |     |  L  |
---------------------------
  R  |  D  |  L  |  D  |     |
---------------------------
  U  |     |  D  |     |  D  |
---------------------------
  R  |  D  |  D  |  D  |  R  |

---------------------

---------------------------
  U  |  D  |  D  |  U  |  U  |

---------------------------
  R  |  U  |  L  |  D  |     |
---------------------------
  R  |     |  D  |     |  R  |
---------------------------
  D  |  U  |  U  |  L  |     |
---------------------------
  U  |     |  L  |     |  R  |
---------------------------
  U  |  L  |  U  |  U  |  U  |

---------------------------
  R  |  D  |  L  |  R  |     |
---------------------------
  U  |     |  R  |     |  D  |
---------------------------
  L  |  U  |  D  |  D  |     |
---------------------------
  R  |     |  U  |     |  D  |
---------------------------
  R  |  L  |  L  |  U  |  R  |

---------------------------
  L  |  U  |  U  |  D  |     |
---------------------------
  D  |     |  U  |     |  R  |
---------------------------
  U  |  R  |  R  |  R  |     |
---------------------------
  L  |     |  U  |     |  R  |
---------------------------
  U  |  U  |  R  |  R  |  L  |

---------------------------
  L  |  L  |  L  |  L  |

  R  |  R  |  L  |  R  |  U  |

---------------------------
  D  |  L  |  U  |  D  |     |
---------------------------
  U  |     |  U  |     |  L  |
---------------------------
  U  |  U  |  D  |  L  |     |
---------------------------
  U  |     |  R  |     |  D  |
---------------------------
  D  |  D  |  U  |  R  |  R  |

---------------------------
  L  |  R  |  U  |  D  |     |
---------------------------
  D  |     |  R  |     |  L  |
---------------------------
  D  |  D  |  L  |  U  |     |
---------------------------
  D  |     |  U  |     |  U  |
---------------------------
  L  |  L  |  U  |  D  |  U  |

---------------------------
  R  |  D  |  D  |  R  |     |
---------------------------
  U  |     |  D  |     |  R  |
---------------------------
  L  |  R  |  L  |  R  |     |
---------------------------
  U  |     |  D  |     |  L  |
---------------------------
  D  |  L  |  R  |  R  |  U  |

---------------------------
  L  |  U  |  D  |  D  |     |
---------------------

  R  |     |  U  |     |  U  |
---------------------------
  L  |  L  |  U  |  R  |     |
---------------------------
  D  |     |  R  |     |  R  |
---------------------------
  D  |  D  |  U  |  D  |  U  |

---------------------------
  D  |  L  |  U  |  U  |     |
---------------------------
  R  |     |  L  |     |  R  |
---------------------------
  R  |  L  |  R  |  L  |     |
---------------------------
  R  |     |  U  |     |  U  |
---------------------------
  L  |  U  |  L  |  U  |  D  |

---------------------------
  U  |  R  |  U  |  D  |     |
---------------------------
  R  |     |  U  |     |  R  |
---------------------------
  R  |  U  |  D  |  R  |     |
---------------------------
  U  |     |  R  |     |  L  |
---------------------------
  D  |  R  |  D  |  U  |  R  |

---------------------------
  U  |  U  |  L  |  L  |     |
---------------------------
  U  |     |  U  |     |  R  |
---------------------------
  L  |  R  |  U  |  R  |     |
----------------------

  R  |     |  D  |     |  U  |
---------------------------
  D  |  L  |  D  |  L  |  R  |

---------------------------
  R  |  R  |  U  |  L  |     |
---------------------------
  L  |     |  R  |     |  U  |
---------------------------
  R  |  R  |  U  |  D  |     |
---------------------------
  U  |     |  R  |     |  R  |
---------------------------
  R  |  R  |  D  |  R  |  R  |

---------------------------
  L  |  R  |  U  |  R  |     |
---------------------------
  D  |     |  U  |     |  L  |
---------------------------
  U  |  D  |  L  |  U  |     |
---------------------------
  D  |     |  U  |     |  D  |
---------------------------
  D  |  R  |  R  |  R  |  L  |

---------------------------
  U  |  L  |  R  |  L  |     |
---------------------------
  L  |     |  L  |     |  U  |
---------------------------
  D  |  R  |  L  |  D  |     |
---------------------------
  L  |     |  R  |     |  L  |
---------------------------
  U  |  R  |  U  |  D  |  U  |

---------------------

  U  |  L  |  L  |  L  |     |
---------------------------
  R  |     |  U  |     |  L  |
---------------------------
  U  |  L  |  L  |  D  |     |
---------------------------
  R  |     |  R  |     |  L  |
---------------------------
  U  |  R  |  R  |  D  |  L  |

---------------------------
  D  |  R  |  R  |  U  |     |
---------------------------
  U  |     |  U  |     |  L  |
---------------------------
  D  |  R  |  U  |  L  |     |
---------------------------
  U  |     |  R  |     |  R  |
---------------------------
  R  |  D  |  R  |  D  |  D  |

---------------------------
  L  |  R  |  U  |  L  |     |
---------------------------
  R  |     |  L  |     |  L  |
---------------------------
  L  |  D  |  U  |  U  |     |
---------------------------
  R  |     |  L  |     |  R  |
---------------------------
  U  |  D  |  R  |  D  |  U  |

---------------------------
  D  |  L  |  R  |  L  |     |
---------------------------
  D  |     |  D  |     |  D  |
----------------------

---------------------------
  U  |  D  |  D  |  U  |     |
---------------------------
  R  |     |  U  |     |  R  |
---------------------------
  R  |  D  |  U  |  U  |  R  |

---------------------------
  U  |  R  |  D  |  D  |     |
---------------------------
  U  |     |  U  |     |  D  |
---------------------------
  D  |  D  |  R  |  U  |     |
---------------------------
  D  |     |  D  |     |  R  |
---------------------------
  R  |  L  |  R  |  U  |  D  |

---------------------------
  D  |  D  |  D  |  D  |     |
---------------------------
  R  |     |  L  |     |  R  |
---------------------------
  L  |  L  |  U  |  R  |     |
---------------------------
  D  |     |  U  |     |  L  |
---------------------------
  R  |  D  |  R  |  L  |  U  |

---------------------------
  L  |  D  |  L  |  D  |     |
---------------------------
  R  |     |  R  |     |  R  |
---------------------------
  U  |  R  |  U  |  R  |     |
---------------------------
  U  |     |  R  |     | 

  L  |  L  |  L  |  L  |  R  |

---------------------------
  R  |  R  |  L  |  R  |     |
---------------------------
  L  |     |  L  |     |  R  |
---------------------------
  U  |  L  |  R  |  L  |     |
---------------------------
  U  |     |  L  |     |  U  |
---------------------------
  L  |  D  |  U  |  L  |  L  |

---------------------------
  R  |  R  |  U  |  R  |     |
---------------------------
  U  |     |  U  |     |  L  |
---------------------------
  D  |  U  |  U  |  R  |     |
---------------------------
  U  |     |  D  |     |  R  |
---------------------------
  R  |  U  |  U  |  U  |  R  |

---------------------------
  R  |  D  |  U  |  L  |     |
---------------------------
  L  |     |  L  |     |  R  |
---------------------------
  L  |  D  |  L  |  L  |     |
---------------------------
  D  |     |  D  |     |  R  |
---------------------------
  D  |  L  |  R  |  L  |  L  |

---------------------------
  D  |  D  |  U  |  D  |     |
---------------------

  D  |     |  L  |     |  U  |
---------------------------
  L  |  U  |  U  |  U  |     |
---------------------------
  L  |     |  U  |     |  L  |
---------------------------
  U  |  D  |  R  |  U  |  D  |

---------------------------
  U  |  R  |  D  |  R  |     |
---------------------------
  L  |     |  R  |     |  L  |
---------------------------
  L  |  R  |  U  |  L  |     |
---------------------------
  L  |     |  R  |     |  D  |
---------------------------
  U  |  L  |  U  |  L  |  R  |

---------------------------
  R  |  U  |  U  |  R  |     |
---------------------------
  L  |     |  R  |     |  L  |
---------------------------
  L  |  D  |  L  |  U  |     |
---------------------------
  R  |     |  R  |     |  L  |
---------------------------
  R  |  D  |  R  |  D  |  U  |

---------------------------
  R  |  U  |  U  |  L  |     |
---------------------------
  D  |     |  U  |     |  R  |
---------------------------
  U  |  L  |  D  |  L  |     |
----------------------

  L  |     |  L  |     |  D  |
---------------------------
  U  |  D  |  L  |  U  |  L  |

---------------------------
  L  |  L  |  U  |  L  |     |
---------------------------
  L  |     |  L  |     |  L  |
---------------------------
  L  |  R  |  R  |  D  |     |
---------------------------
  U  |     |  D  |     |  U  |
---------------------------
  R  |  L  |  U  |  L  |  U  |

---------------------------
  R  |  U  |  D  |  R  |     |
---------------------------
  U  |     |  R  |     |  U  |
---------------------------
  U  |  U  |  U  |  L  |     |
---------------------------
  R  |     |  L  |     |  R  |
---------------------------
  D  |  D  |  R  |  U  |  D  |

---------------------------
  U  |  R  |  R  |  U  |     |
---------------------------
  R  |     |  U  |     |  L  |
---------------------------
  L  |  U  |  L  |  R  |     |
---------------------------
  L  |     |  L  |     |  U  |
---------------------------
  L  |  U  |  D  |  U  |  U  |

---------------------

  D  |  U  |  U  |  R  |     |
---------------------------
  L  |     |  D  |     |  L  |
---------------------------
  D  |  L  |  R  |  U  |     |
---------------------------
  U  |     |  D  |     |  D  |
---------------------------
  L  |  D  |  R  |  U  |  U  |

---------------------------
  U  |  R  |  R  |  L  |     |
---------------------------
  R  |     |  L  |     |  L  |
---------------------------
  D  |  D  |  D  |  R  |     |
---------------------------
  U  |     |  U  |     |  D  |
---------------------------
  U  |  L  |  L  |  R  |  D  |

---------------------------
  U  |  L  |  U  |  U  |     |
---------------------------
  U  |     |  U  |     |  L  |
---------------------------
  L  |  R  |  D  |  U  |     |
---------------------------
  D  |     |  R  |     |  U  |
---------------------------
  D  |  D  |  R  |  U  |  R  |

---------------------------
  R  |  D  |  U  |  U  |     |
---------------------------
  R  |     |  D  |     |  L  |
----------------------

  R  |  R  |  L  |  U  |     |
---------------------------
  U  |     |  U  |     |  D  |
---------------------------
  L  |  L  |  U  |  L  |  L  |

---------------------------
  D  |  R  |  D  |  D  |     |
---------------------------
  R  |     |  R  |     |  U  |
---------------------------
  D  |  R  |  U  |  D  |     |
---------------------------
  U  |     |  U  |     |  U  |
---------------------------
  L  |  R  |  R  |  U  |  L  |

---------------------------
  L  |  U  |  U  |  D  |     |
---------------------------
  R  |     |  L  |     |  D  |
---------------------------
  U  |  R  |  D  |  U  |     |
---------------------------
  D  |     |  U  |     |  D  |
---------------------------
  D  |  R  |  R  |  R  |  L  |

---------------------------
  U  |  U  |  U  |  R  |     |
---------------------------
  U  |     |  L  |     |  D  |
---------------------------
  U  |  D  |  L  |  U  |     |
---------------------------
  R  |     |  U  |     |  L  |
----------------------

---------------------------
  R  |  R  |  L  |  D  |  L  |

---------------------------
  D  |  D  |  D  |  D  |     |
---------------------------
  U  |     |  L  |     |  U  |
---------------------------
  D  |  U  |  L  |  L  |     |
---------------------------
  L  |     |  R  |     |  L  |
---------------------------
  R  |  L  |  D  |  R  |  R  |

---------------------------
  L  |  L  |  U  |  U  |     |
---------------------------
  D  |     |  D  |     |  R  |
---------------------------
  U  |  D  |  D  |  U  |     |
---------------------------
  D  |     |  R  |     |  R  |
---------------------------
  U  |  L  |  D  |  R  |  D  |

---------------------------
  U  |  L  |  L  |  R  |     |
---------------------------
  U  |     |  L  |     |  R  |
---------------------------
  U  |  L  |  U  |  U  |     |
---------------------------
  R  |     |  R  |     |  D  |
---------------------------
  D  |  U  |  L  |  U  |  D  |

---------------------------
  R  |  R  |  U  |  R  |

  L  |  D  |  R  |  R  |     |
---------------------------
  U  |     |  R  |     |  R  |
---------------------------
  R  |  U  |  D  |  U  |     |
---------------------------
  R  |     |  L  |     |  D  |
---------------------------
  L  |  L  |  R  |  R  |  U  |

---------------------------
  U  |  L  |  D  |  R  |     |
---------------------------
  R  |     |  L  |     |  D  |
---------------------------
  U  |  L  |  D  |  L  |     |
---------------------------
  L  |     |  D  |     |  U  |
---------------------------
  R  |  D  |  U  |  L  |  D  |

---------------------------
  D  |  U  |  R  |  R  |     |
---------------------------
  D  |     |  D  |     |  L  |
---------------------------
  R  |  R  |  D  |  R  |     |
---------------------------
  L  |     |  R  |     |  L  |
---------------------------
  U  |  R  |  L  |  R  |  D  |

---------------------------
  U  |  U  |  R  |  U  |     |
---------------------------
  D  |     |  U  |     |  L  |
----------------------

  R  |     |  L  |     |  D  |
---------------------------
  R  |  L  |  D  |  L  |  D  |

---------------------------
  L  |  U  |  R  |  R  |     |
---------------------------
  R  |     |  L  |     |  L  |
---------------------------
  D  |  U  |  L  |  L  |     |
---------------------------
  L  |     |  D  |     |  D  |
---------------------------
  U  |  L  |  L  |  L  |  R  |

---------------------------
  U  |  U  |  D  |  U  |     |
---------------------------
  D  |     |  D  |     |  R  |
---------------------------
  R  |  L  |  R  |  U  |     |
---------------------------
  U  |     |  D  |     |  R  |
---------------------------
  R  |  R  |  R  |  L  |  U  |

---------------------------
  U  |  L  |  U  |  U  |     |
---------------------------
  L  |     |  D  |     |  L  |
---------------------------
  U  |  R  |  D  |  L  |     |
---------------------------
  D  |     |  R  |     |  R  |
---------------------------
  R  |  D  |  L  |  R  |  U  |

---------------------

  D  |  R  |  L  |  L  |  D  |

---------------------------
  D  |  R  |  D  |  D  |     |
---------------------------
  D  |     |  D  |     |  D  |
---------------------------
  R  |  R  |  D  |  U  |     |
---------------------------
  D  |     |  D  |     |  R  |
---------------------------
  R  |  U  |  R  |  L  |  U  |

---------------------------
  U  |  D  |  U  |  U  |     |
---------------------------
  R  |     |  R  |     |  R  |
---------------------------
  U  |  D  |  U  |  L  |     |
---------------------------
  R  |     |  D  |     |  L  |
---------------------------
  U  |  R  |  L  |  D  |  R  |

---------------------------
  L  |  D  |  U  |  U  |     |
---------------------------
  R  |     |  D  |     |  D  |
---------------------------
  R  |  L  |  R  |  U  |     |
---------------------------
  D  |     |  R  |     |  D  |
---------------------------
  U  |  R  |  R  |  U  |  L  |

---------------------------
  D  |  L  |  R  |  L  |     |
---------------------

In [5]:
def best_action_value(grid, V, s, gamma = 0.9):
    action = None
    val = float('-inf')
    grid.set_state(s)
    
    for a in ACTION_SPACE:
        transitions = grid.get_transition_probs(a)
        v = 0
        r = 0
        for (P, rew, state) in transitions:
            r += P * rew
            v += P * V[state]
        v = r + gamma * v
        if v > val:
            val = v
            action = a
    
    return action, val

def value_iteration(grid, threshold = 1e-3, gamma = 0.9):
    V = {}
    for s in grid.all_states():
        V[s] = 0
    
    while True:
        delta = 0
        for s in grid.non_terminal_states():
            val = V[s]
            _, V[s] = best_action_value(grid, V, s, gamma)
            delta = max(delta, abs(val - V[s]))
            
        if delta < threshold:
            break
    
    policy = generate_random_policy(grid)
    for s in policy.keys():
        grid.set_state(s)
        policy[s], _ = best_action_value(grid, V, s, gamma)
    
    return V, policy

In [6]:
best_value, best_policy = value_iteration(myenv)
utils.print_values(best_value, myenv)
utils.print_policy(best_policy, myenv)

---------------------------
 0.81| 0.90| 1.00| 0.00|
---------------------------
 0.73| 0.00| 0.90| 0.00|
---------------------------
 0.66| 0.73| 0.81| 0.73|
---------------------------
  R  |  R  |  R  |     |
---------------------------
  U  |     |  U  |     |
---------------------------
  U  |  R  |  U  |  L  |


In [7]:
best_value, best_policy = value_iteration(newenv)
utils.print_values(best_value, newenv)
utils.print_policy(best_policy, newenv)

---------------------------
 0.73| 0.81| 0.90| 1.00| 0.00|
---------------------------
 0.66| 0.00| 0.81| 0.00| 1.00|
---------------------------
 0.59| 0.66| 0.73| 0.66| 0.00|
---------------------------
 0.53| 0.00| 0.66| 0.00| 0.43|
---------------------------
 0.48| 0.53| 0.59| 0.53| 0.48|
---------------------------
  R  |  R  |  R  |  R  |     |
---------------------------
  U  |     |  U  |     |  U  |
---------------------------
  U  |  R  |  U  |  L  |     |
---------------------------
  U  |     |  U  |     |  D  |
---------------------------
  U  |  R  |  U  |  L  |  L  |


In [8]:
mydrunkenenv = grid_world.standard_grid(obey_prob=0.8,step_cost=-0.1)
good_policy, reward = random_policy_iteration(mydrunkenenv)
print("Best Policy: {0} reward".format(reward))
utils.print_policy(good_policy, mydrunkenenv)

---------------------------
  L  |  D  |  L  |     |
---------------------------
  L  |     |  U  |     |
---------------------------
  L  |  L  |  D  |  R  |

---------------------------
  D  |  L  |  L  |     |
---------------------------
  R  |     |  L  |     |
---------------------------
  D  |  U  |  U  |  R  |

---------------------------
  R  |  U  |  D  |     |
---------------------------
  D  |     |  U  |     |
---------------------------
  U  |  R  |  D  |  L  |

---------------------------
  U  |  R  |  D  |     |
---------------------------
  D  |     |  R  |     |
---------------------------
  U  |  L  |  R  |  D  |

---------------------------
  R  |  D  |  R  |     |
---------------------------
  D  |     |  U  |     |
---------------------------
  R  |  D  |  R  |  D  |

---------------------------
  L  |  D  |  R  |     |
---------------------------
  L  |     |  U  |     |
---------------------------
  U  |  R  |  R  |  U  |

---------------------------
  D  |  R  |

In [9]:
best_value, best_policy = value_iteration(mydrunkenenv)
utils.print_values(best_value, mydrunkenenv)
utils.print_policy(best_policy, mydrunkenenv)

---------------------------
 0.45| 0.67| 0.91| 0.00|
---------------------------
 0.27| 0.00| 0.51| 0.00|
---------------------------
 0.12| 0.12| 0.28| 0.01|
---------------------------
  R  |  R  |  R  |     |
---------------------------
  U  |     |  U  |     |
---------------------------
  U  |  R  |  U  |  L  |


In [10]:
mysuicideenv = grid_world.standard_grid(obey_prob=0.8,step_cost=-2)
good_policy, reward = random_policy_iteration(mysuicideenv, threshold = -8)
print("Best Policy: {0} reward".format(reward))
utils.print_policy(good_policy, mysuicideenv)

---------------------------
  L  |  D  |  L  |     |
---------------------------
  L  |     |  U  |     |
---------------------------
  L  |  L  |  D  |  R  |

---------------------------
  D  |  L  |  L  |     |
---------------------------
  R  |     |  L  |     |
---------------------------
  D  |  U  |  U  |  R  |

---------------------------
  R  |  U  |  D  |     |
---------------------------
  D  |     |  U  |     |
---------------------------
  U  |  R  |  D  |  L  |

---------------------------
  U  |  R  |  D  |     |
---------------------------
  D  |     |  R  |     |
---------------------------
  U  |  L  |  R  |  D  |

---------------------------
  R  |  D  |  R  |     |
---------------------------
  D  |     |  U  |     |
---------------------------
  R  |  D  |  R  |  D  |

---------------------------
  L  |  D  |  R  |     |
---------------------------
  L  |     |  U  |     |
---------------------------
  U  |  R  |  R  |  U  |

---------------------------
  D  |  R  |

In [11]:
best_value, best_policy = value_iteration(mysuicideenv)
utils.print_values(best_value, mysuicideenv)
utils.print_policy(best_policy, mysuicideenv)

---------------------------
-4.56|-2.18| 0.29| 0.00|
---------------------------
-6.45| 0.00|-1.50| 0.00|
---------------------------
-7.32|-5.67|-3.68|-1.68|
---------------------------
  R  |  R  |  R  |     |
---------------------------
  U  |     |  R  |     |
---------------------------
  R  |  R  |  R  |  U  |
