In [1]:
import pygame
import numpy as np
import random
import sys

# --- Configuration ---
CELL_SIZE = 40
MAZE_ROWS = 10
MAZE_COLS = 10
WIDTH = MAZE_COLS * CELL_SIZE
HEIGHT = MAZE_ROWS * CELL_SIZE

# Maze grid definition:
# 1 = wall, 0 = free space, 3 = goal
maze_grid = [
    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
    [1, 0, 0, 0, 1, 0, 0, 0, 0, 1],
    [1, 0, 1, 0, 1, 0, 1, 1, 0, 1],
    [1, 0, 1, 0, 0, 0, 0, 1, 0, 1],
    [1, 0, 1, 1, 1, 1, 0, 1, 0, 1],
    [1, 0, 0, 0, 0, 0, 0, 1, 0, 1],
    [1, 1, 1, 1, 1, 0, 1, 1, 0, 1],
    [1, 0, 0, 0, 1, 0, 0, 0, 0, 1],
    [1, 0, 1, 0, 0, 0, 1, 0, 3, 1],  # Goal is at (8, 8)
    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
]


# --- Maze Environment ---
class Maze:
    def __init__(self, grid, cell_size):
        self.grid = grid
        self.cell_size = cell_size
        self.rows = len(grid)
        self.cols = len(grid[0])

    def draw(self, screen, agent_pos):
        # Draw maze cells
        for r in range(self.rows):
            for c in range(self.cols):
                rect = pygame.Rect(c * self.cell_size, r * self.cell_size,
                                   self.cell_size, self.cell_size)
                if self.grid[r][c] == 1:
                    pygame.draw.rect(screen, (0, 0, 0), rect)  # wall: black
                elif self.grid[r][c] == 0:
                    pygame.draw.rect(screen, (255, 255, 255), rect)  # free: white
                elif self.grid[r][c] == 3:
                    pygame.draw.rect(screen, (0, 255, 0), rect)  # goal: green

        # Draw grid lines for clarity
        for r in range(self.rows):
            pygame.draw.line(screen, (200, 200, 200), (0, r * self.cell_size),
                             (self.cols * self.cell_size, r * self.cell_size))
        for c in range(self.cols):
            pygame.draw.line(screen, (200, 200, 200), (c * self.cell_size, 0),
                             (c * self.cell_size, self.rows * self.cell_size))

        # Draw agent as a red rectangle
        agent_rect = pygame.Rect(agent_pos[1] * self.cell_size, agent_pos[0] * self.cell_size,
                                 self.cell_size, self.cell_size)
        pygame.draw.rect(screen, (255, 0, 0), agent_rect)

    def step(self, pos, action):
        """
        Given a state (pos) and an action, return:
          - next state (position)
          - reward
          - done (True if goal reached)
        Action mapping: 0=Up, 1=Right, 2=Down, 3=Left
        """
        r, c = pos
        new_r, new_c = r, c

        if action == 0:  # Up
            new_r = r - 1
        elif action == 1:  # Right
            new_c = c + 1
        elif action == 2:  # Down
            new_r = r + 1
        elif action == 3:  # Left
            new_c = c - 1

        # Check boundaries
        if new_r < 0 or new_r >= self.rows or new_c < 0 or new_c >= self.cols:
            return pos, -1, False

        # Check if hit a wall
        if self.grid[new_r][new_c] == 1:
            return pos, -1, False

        # Valid move
        new_pos = (new_r, new_c)
        if self.grid[new_r][new_c] == 3:
            # Reached goal
            return new_pos, 10, True
        else:
            # Regular step penalty encourages faster solutions
            return new_pos, -0.1, False


# --- Q-learning Agent ---
class Agent:
    def __init__(self, maze, epsilon=0.1, alpha=0.5, gamma=0.9):
        self.maze = maze
        self.epsilon = epsilon      # exploration rate
        self.alpha = alpha          # learning rate
        self.gamma = gamma          # discount factor
        self.start_pos = (1, 1)     # starting position (must be a free cell)
        self.position = self.start_pos
        # Q-table: rows x cols x number_of_actions (here 4)
        self.q_table = np.zeros((maze.rows, maze.cols, 4))

    def choose_action(self, state):
        r, c = state
        # Epsilon-greedy policy
        if random.uniform(0, 1) < self.epsilon:
            return random.randint(0, 3)
        else:
            return int(np.argmax(self.q_table[r, c]))

    def update_q(self, state, action, reward, next_state):
        r, c = state
        next_r, next_c = next_state
        best_next = np.max(self.q_table[next_r, next_c])
        current_q = self.q_table[r, c, action]
        # Q-learning update rule
        self.q_table[r, c, action] = current_q + self.alpha * (reward + self.gamma * best_next - current_q)

    def reset(self):
        self.position = self.start_pos


# --- Main Loop ---
def main():
    pygame.init()
    screen = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption("RL Maze Navigation")
    clock = pygame.time.Clock()

    maze = Maze(maze_grid, CELL_SIZE)
    agent = Agent(maze, epsilon=0.1, alpha=0.5, gamma=0.9)

    num_episodes = 500
    max_steps = 200

    print("Training...")
    for episode in range(num_episodes):
        agent.reset()
        state = agent.position
        done = False
        step = 0

        while not done and step < max_steps:
            # Allow the window to be closed
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit()

            action = agent.choose_action(state)
            next_state, reward, done = maze.step(state, action)
            agent.update_q(state, action, reward, next_state)
            state = next_state
            agent.position = state

            # Optional: Visualize training episodes (can slow training)
            screen.fill((0, 0, 0))
            maze.draw(screen, agent.position)
            pygame.display.flip()
            clock.tick(60)
            step += 1

        print(f"Episode {episode+1}/{num_episodes}, steps: {step}")

    print("Training completed. Now demonstrating the learned policy...")

    # --- Demonstration: Let the agent run with learned policy (no exploration) ---
    agent.epsilon = 0  # Disable exploration
    agent.reset()
    state = agent.position
    path = [state]
    done = False

    while not done:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()

        action = agent.choose_action(state)
        next_state, reward, done = maze.step(state, action)
        state = next_state
        path.append(state)

        screen.fill((0, 0, 0))
        maze.draw(screen, state)

        # Draw the path taken so far (blue outline)
        for pos in path:
            r, c = pos
            rect = pygame.Rect(c * CELL_SIZE, r * CELL_SIZE, CELL_SIZE, CELL_SIZE)
            pygame.draw.rect(screen, (0, 0, 255), rect, 3)

        pygame.display.flip()
        clock.tick(5)

    print("Goal reached! Press the close button to exit.")
    # Wait until user closes the window
    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()


if __name__ == '__main__':
    main()


pygame 2.6.1 (SDL 2.28.4, Python 3.9.13)
Hello from the pygame community. https://www.pygame.org/contribute.html
Training...
Episode 1/500, steps: 200
Episode 2/500, steps: 19
Episode 3/500, steps: 70
Episode 4/500, steps: 134
Episode 5/500, steps: 93
Episode 6/500, steps: 35
Episode 7/500, steps: 50
Episode 8/500, steps: 28
Episode 9/500, steps: 55
Episode 10/500, steps: 28
Episode 11/500, steps: 18
Episode 12/500, steps: 33


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
