In [None]:
import numpy as np
import maze as mz 
import matplotlib.pyplot as plt

In [None]:
# Description of the maze as a numpy array
maze = np.array([
    [0, 0, 1, 0, 0, 0, 0, 0],
    [0, 0, 1, 0, 0, 1, 0, 0],
    [0, 0, 1, 0, 0, 1, 1, 1],
    [0, 0, 1, 0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0],
    [0, 1, 1, 1, 1, 1, 1, 0],
    [0, 0, 0, 0, 1, 2, 0, 0]
])
# with the convention 
# 0 = empty cell
# 1 = obstacle
# 2 = exit of the Maze

mz.draw_maze(maze)
# Create an environment maze
env = mz.Maze(maze, False)
# env.show()

In [None]:
# Finite horizon
horizon = 20
# Solve the MDP problem with dynamic programming 
V, policy= mz.dynamic_programming(env,horizon);

In [None]:
# Animate the shortest path starting from position A
method = 'DynProg';
start  = (0,0,6,5);
path = env.simulate(start, policy, method);
# Show the shortest path 
#mz.animate_solution(maze, path)
print(path)

In [None]:
## Compute the probability of getting out successfuly for varying time-horizons
horizons = np.arange(1,31)

probab_exiting_mini_no_stay = mz.compute_POS(env, start, n_iterations = 10000, horizon = horizons.tolist() , method = 'DynProg')
# Set the mini_stay to true 
env.set_mini_stay(True)
probab_exiting_mini_stay = mz.compute_POS(env, start, n_iterations = 10000, horizon = horizons.tolist() , method = 'DynProg')

fig1 = plt.figure
plt.scatter(horizons, probab_exiting_mini_no_stay, label = 'Minitaur cannot stay')
plt.scatter(horizons, probab_exiting_mini_stay, label = 'Minitaur can stay')
plt.title("Probability of getting out alive!")
plt.xlabel('Time Horizon T')
plt.ylabel('Probability of Success')
plt.legend()
plt.grid()

In [None]:
# Solve the infinite horizon discounted MDP
env.set_mini_stay(False)
# Discount Factor can be computed from the mean 1/(1-gamma) = 30
gamma   = 29/30; 
# Accuracy treshold 
epsilon = 0.0001;
V, policy = mz.value_iteration(env, gamma, epsilon)


In [None]:
method = 'ValIter';
start  = (0,0,6,5);
path = env.simulate(start, policy, method)
# Show the shortest path 
mz.animate_solution(maze, path)

In [None]:
# Estimate probability of getting out alive for value iteration
start  = (0,0,6,5);
probab_exiting = mz.compute_POS(env, start, n_iterations = 10000, horizon = [] , method = 'ValIter')
print('Probability of getting out alive: ', probab_exiting)