Implement Reinforcement Learning using an example of a maze environment that the agent needs to explore.

In [1]:
# Simple Maze Solver - Q-Learning


import numpy as np

In [2]:
# Define maze (0 = path, 1 = wall)
maze = np.array([
    [0, 0, 1, 0, 0],
    [0, 0, 1, 0, 1],
    [0, 0, 0, 0, 0],
    [1, 1, 1, 0, 1],
    [0, 0, 0, 0, 0]
])

start = (0, 0)
goal = (4, 4)
rows, cols = maze.shape


In [3]:
# Initialize Q-table: (rows, cols, 4 actions)
# Actions: 0=up, 1=down, 2=left, 3=right
q_table = np.zeros((rows, cols, 4))

In [4]:
# Parameters
alpha = 0.1      # Learning rate
gamma = 0.9      # Discount factor
epsilon = 0.9    # Exploration rate
episodes = 500


In [5]:
# Actions
actions = [(-1, 0), (1, 0), (0, -1), (0, 1)]

In [6]:
# Helper functions
def is_valid(pos):
    r, c = pos
    return 0 <= r < rows and 0 <= c < cols and maze[r][c] == 0

def get_next_pos(current, action_idx):
    next_pos = (current[0] + actions[action_idx][0], 
                current[1] + actions[action_idx][1])
    return next_pos if is_valid(next_pos) else current

def get_reward(pos):
    return 100 if pos == goal else -1

In [7]:
# Training
print("Training agent...")
for episode in range(episodes):
    pos = start
    
    while pos != goal:
        # Choose action (explore or exploit)
        if np.random.random() < epsilon:
            action = np.random.randint(4)
        else:
            action = np.argmax(q_table[pos[0], pos[1]])
        
        # Take action
        next_pos = get_next_pos(pos, action)
        reward = get_reward(next_pos)
        
        # Update Q-table
        current_q = q_table[pos[0], pos[1], action]
        max_next_q = np.max(q_table[next_pos[0], next_pos[1]])
        q_table[pos[0], pos[1], action] = current_q + alpha * (reward + gamma * max_next_q - current_q)
        
        pos = next_pos
    
    epsilon = max(0.1, epsilon * 0.995)

print("Training complete!\n")

Training agent...
Training complete!



In [8]:
# Test the agent
pos = start
path = [pos]

while pos != goal:
    action = np.argmax(q_table[pos[0], pos[1]])
    pos = get_next_pos(pos, action)
    path.append(pos)

print(f"Path found in {len(path)-1} steps:")
print(path)

Path found in 8 steps:
[(0, 0), (1, 0), (1, 1), (2, 1), (2, 2), (2, 3), (3, 3), (4, 3), (4, 4)]


In [9]:
# Visualize solution
print("\nMaze Solution:")
for i in range(rows):
    for j in range(cols):
        if (i, j) == start:
            print("S", end=" ")
        elif (i, j) == goal:
            print("G", end=" ")
        elif (i, j) in path:
            print("*", end=" ")
        elif maze[i][j] == 1:
            print("#", end=" ")
        else:
            print(".", end=" ")
    print()

print("\nS=Start, G=Goal, *=Path, #=Wall, .=Empty")


Maze Solution:
S . # . . 
* * # . # 
. * * * . 
# # # * # 
. . . * G 

S=Start, G=Goal, *=Path, #=Wall, .=Empty
