In [2]:
import numpy as np
from mazeN_class import *

actions = ['U', 'D', 'L', 'R'] # UP , DOWN , LEFT , RIGHT

def is_valid_move(x, y, n, maze):
    if 0 <= x < n and 0 <= y < n and maze[x][y] != 0:
        return True
    return False

def value_iteration(maze, gamma=0.9, epsilon=1e-6):
    rows = cols = len(maze[0])
    
    def get_neighbors(x, y):
        neighbors = []
        for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
            nx, ny = x + dx, y + dy
            if is_valid_move(nx, ny, rows):
                neighbors.append((nx, ny))
        return neighbors
    
    start_cell = None
    goal_cell = None
    
    for i in range(rows):
        for j in range(cols):
            if maze[i][j] == 2:
                start_cell = (i, j)
            elif maze[i][j] == 3:
                goal_cell = (i, j)
    
    
    if start_cell is None or goal_cell is None:
        raise ValueError("Can't find both start and goal cells!")
    
    reward = np.zeros((rows, cols))
    V = np.zeros((rows, cols))
    
    while True:
        delta = 0
        for i in range(rows):
            for j in range(cols):
                if maze[i][j] != 0:  # Ignore walls
                    max_q_value = float('-inf')
                    for action, (dx, dy) in enumerate([(-1, 0), (1, 0), (0, -1), (0, 1)]):
                        nx, ny = i + dx, j + dy
                        if is_valid_move(nx, ny, rows, maze):
                            p = 1  # Assuming deterministic transitions
                            if maze[i][j] == 3:
                                reward = 11    
                            else:
                                reward = -1
                            q_value = p * (reward + gamma * V[nx][ny])
                            max_q_value = max(max_q_value, q_value)

                    delta = max(delta, abs(max_q_value - V[i][j]))
                    V[i][j] = max_q_value

        if delta < epsilon:
            break
    
    policy = np.zeros((rows, cols), dtype=str)
    for i in range(rows):
        for j in range(cols):
            if maze[i][j] != 0:  # Ignore walls
                if maze[i][j] == 3:
                    policy[i][j] = 'S' # S -> stop
                    continue
                max_action = -1
                max_q_value = float('-inf')
                for action, (dx, dy) in enumerate([(-1, 0), (1, 0), (0, -1), (0, 1)]):
                    nx, ny = i + dx, j + dy
                    if is_valid_move(nx, ny, rows, maze):
                        p = 1  
                        if maze[i][j] == 3:
                            reward = 10
                        else:
                            reward = -1
                        q_value = p * (reward + gamma * V[nx][ny])
                        if q_value > max_q_value:
                            max_q_value = q_value
                            max_action = action
                policy[i][j] = actions[max_action]
            elif maze[i][j] == 0:
                policy[i][j] = 'X '
    
    return V, policy

def policy_modified(policy):
    n = len(policy[0])
    x = np.zeros((n,n), dtype=str)
    for i in range(n):
        for j in range(n):
            if policy[i][j] == 0:
                x[i][j] == 'U'
            elif policy[i][j] == 1:
                x[i][j] == 'D'
            elif policy[i][j] == 2:
                x[i][j] == 'L'
            elif policy[i][j] == 3:
                x[i][j] == 'R'
            elif policy[i][j] == 4:
                x[i][j] == 'X '
    return x

def draw_maze(canvas, row, col, color):
    x1 = col * CELL_SIZE
    y1 = row * CELL_SIZE
    x2 = x1 + CELL_SIZE
    y2 = y1 + CELL_SIZE
    canvas.create_rectangle(x1, y1, x2, y2, fill = color)

def visualize_values(values, maze):
    n = len(maze[0])
    
    root = Tk()
    root.title('Value function')
    canvas_side = n * CELL_SIZE
    canvas = Canvas(root, width = canvas_side, height = canvas_side, bg = 'grey')
    
    for i in range(n):
        for j in range(n):
            if maze[i][j] == 0:
                draw_maze(canvas, i, j, 'black')
            if maze[i][j] == 1:
                draw_maze(canvas, i, j, 'white')
            if maze[i][j] == 2:
                draw_maze(canvas, i, j, 'red')
            elif maze[i][j] == 3:
                draw_maze(canvas, i, j, 'green')
                
    for i in range(n):
        for j in range(n):
            x = j * CELL_SIZE + CELL_SIZE // 2
            y = i * CELL_SIZE + CELL_SIZE // 2
            canvas.create_text(x, y, text="{:.3f}".format(values[i][j]), font=('Arial', 10))
    
    canvas.pack()
    root.mainloop()

def visualize_policy(policy, maze):
    n = len(policy[0])
    
    up_arrow = '\u2191'  
    down_arrow = '\u2193'  
    left_arrow = '\u2190'  
    right_arrow = '\u2192'  
    
    root = Tk()
    root.title('Policy')
    canvas_side = n * CELL_SIZE
    canvas = Canvas(root, width = canvas_side, height = canvas_side, bg = 'grey')
    
    for i in range(n):
        for j in range(n):
            if maze[i][j] == 0:
                draw_maze(canvas, i, j, 'black')
            if maze[i][j] == 1:
                draw_maze(canvas, i, j, 'white')
            if maze[i][j] == 2:
                draw_maze(canvas, i, j, 'red')
            elif maze[i][j] == 3:
                draw_maze(canvas, i, j, 'green')
    
    for i in range(n):
        for j in range(n):
            if policy[i][j] == 'U':
                arrow_symbol = up_arrow
            elif policy[i][j] == 'D':
                arrow_symbol = down_arrow
            elif policy[i][j] == 'L':
                arrow_symbol = left_arrow
            elif policy[i][j] == 'R':
                arrow_symbol = right_arrow
            else:
                continue

            x = j * CELL_SIZE + CELL_SIZE // 2
            y = i * CELL_SIZE + CELL_SIZE // 2

            canvas.create_text(x, y, text=arrow_symbol, font=('Arial', 20))
            
    canvas.pack()
    root.mainloop()
            
def visualize_values(values, maze):
    n = len(maze[0])
    
    root = Tk()
    root.title('Value functions')
    canvas_side = n * CELL_SIZE
    canvas = Canvas(root, width = canvas_side, height = canvas_side, bg = 'grey')
    
    for i in range(n):
        for j in range(n):
            if maze[i][j] == 0:
                draw_maze(canvas, i, j, 'black')
            if maze[i][j] == 1:
                draw_maze(canvas, i, j, 'white')
            if maze[i][j] == 2:
                draw_maze(canvas, i, j, 'red')
            elif maze[i][j] == 3:
                draw_maze(canvas, i, j, 'green')
                
    for i in range(n):
        for j in range(n):
            x = j * CELL_SIZE + CELL_SIZE // 2
            y = i * CELL_SIZE + CELL_SIZE // 2
            canvas.create_text(x, y, text="{:.3f}".format(values[i][j]), font=('Arial', 14))
            
    canvas.pack()
    root.mainloop()

def solution_path(policy, start, goal):
    n = len(policy[0])
    
    current_row = start[0]
    current_col = start[1]
    print(current_row, current_col)
    path = []
    path.append((current_row, current_col))
    
    while True:
        if policy[current_row][current_col] == 'U':
            current_row = current_row - 1
        elif policy[current_row][current_col] == 'D':
            current_row = current_row + 1
        elif policy[current_row][current_col] == 'R':
            current_col = current_col + 1
        elif policy[current_row][current_col] == 'L':
            current_col = current_col - 1
        elif policy[current_row][current_col] == 'S':
            # path.append((current_row, current_col))
            break
        path.append((current_row, current_col))
    print(path)

n = 10
        
root = Tk()
root.title('Maze Generator')
canvas_side = n * CELL_SIZE
canva = Canvas(root, width = canvas_side, height = canvas_side, bg = 'grey')
canva.pack()

mazen = Mazen(n, canva)
maze, start, goal = mazen.generate_maze()    
maze[start[0]][start[1]] = 2
maze[goal[0]][goal[1]] = 3


print("Maze ------------")
for i in range(n):
    print(maze[i])
print("start: ", start)
print("goal: ", goal)

optimal_value_function_with_policy, optimal_policy_with_policy = value_iteration(maze)
# optimal_policy_with_policy = policy_modified(optimal_policy_with_policy)
print("Optimal Value Function:")
print(optimal_value_function_with_policy)
print("\nOptimal Policy:")
print(optimal_policy_with_policy)
visualize_policy(optimal_policy_with_policy, maze)
visualize_values(optimal_value_function_with_policy, maze)
solution_path(optimal_policy_with_policy, start, goal)
# mazen.visualize_policy(maze, optimal_policy_with_policy)
root.mainloop()


Maze ------------
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 1, 1, 1, 0, 0, 1, 1, 1, 2]
[0, 3, 0, 1, 1, 1, 1, 0, 1, 0]
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 1, 1, 1, 1, 1, 0, 1, 0, 0]
[0, 0, 0, 0, 0, 1, 0, 1, 1, 0]
[0, 1, 1, 0, 0, 1, 0, 0, 1, 0]
[0, 0, 1, 0, 1, 1, 1, 0, 1, 0]
[0, 1, 1, 1, 1, 0, 1, 1, 1, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
start:  (1, 9)
goal:  (2, 1)
Optimal Value Function:
[[ 0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.        ]
 [ 0.         46.84210106 41.15789095 36.04210186  0.          0.
  17.18740025 14.46866023 12.0217942   9.81961478]
 [ 0.         53.15789095  0.         31.43789167 27.2941025  23.56469225
  20.20822303  0.          9.81961478  0.        ]
 [ 0.         46.84210186  0.          0.          0.          0.
   0.          0.          0.          0.        ]
 [ 0.         41.15789167 36.0421025  31.43789225 27.29410303 23.56469272
   0.         -0.52034045  0.          0.        ]
 [ 0.          0. 