In [33]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pygame
import time
import random

In [50]:
class MazeXEnv(gym.Env):
    def __init__(self):
        super(MazeXEnv, self).__init__()
        self.maze = np.array([
            [0,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1],
            [1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,1,1,0,1],
            [0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1],
            [0,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,0,1,1,1],
            [0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1],
            [0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1],
            [0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1],
            [1,1,1,1,0,1,1,1,1,1,0,1,0,1,0,1,0,1,0,1],
            [0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,1],
            [0,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,0,1,0,1],
            [0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1],
            [0,1,0,1,1,1,1,1,1,1,1,1,0,1,0,1,1,1,0,1],
            [0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1],
            [1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1],
            [0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
        ])
        self.start = (0, 0)
        self.goal = (0, 4)
        self.checkpoints = [(14, 6), (4, 10), (2, 16), (10, 2)]
        self.state = self.start
        self.observation_space = spaces.Box(low=0, high=max(self.maze.shape)-1, shape=(2,), dtype=np.int32)
        self.action_space = spaces.Discrete(4)  # 0: move straight, 1: right, 2: left, 3: backward

        # Pygame setup
        self.screen_width = 800
        self.screen_height = 600
        self.screen_size = (self.screen_width, self.screen_height)
        self.cell_size = min(self.screen_width // self.maze.shape[1], self.screen_height // self.maze.shape[0])
        pygame.init()
        self.screen = pygame.display.set_mode(self.screen_size)
        pygame.display.set_caption('MazeX')

    def reset(self):
        self.state = self.start
        return np.array(self.state)
    
    def step(self, action, reward):
        current_row, current_col = self.state
        next_state = list(self.state)
        
        if action == 0:  # Move straight
            # Check if moving straight stays within bounds and doesn't hit a wall
            if current_col + 1 < self.maze.shape[1] and self.maze[current_row, current_col + 1] == 0:
                next_state[1] += 1
        elif action == 1:  # Move right
            if current_row + 1 < self.maze.shape[0] and self.maze[current_row + 1, current_col] == 0:
                next_state[0] += 1
        elif action == 2:  # Move left
            if current_row - 1 >= 0 and self.maze[current_row - 1, current_col] == 0:
                next_state[0] -= 1
        elif action == 3:  # Move backward
            if current_col - 1 >= 0 and self.maze[current_row, current_col - 1] == 0:
                next_state[1] -= 1
        
        reward += -1  # Each step costs -1
         # Checking if the next state is different from the current state or not
        if next_state != list(self.state):
            self.state = tuple(next_state)
            done = self.state == self.goal
            if self.state in self.checkpoints:
                reward += 30 # if checkpoit reached then 30 reward points added
            if done:
                reward += 100 # if goal reached then 100 reward points added
        else:
            done = False
        
        return np.array(self.state), reward, done, {}
    
    def render(self, mode='human'):
        self.screen.fill((255, 255, 255))  # White background

        # Draw the maze
        for row in range(self.maze.shape[0]):
            for col in range(self.maze.shape[1]):
                color = (255, 255, 255) # white colour for paths
                if self.maze[row, col] == 1:
                    color = (0, 0, 0)  # black colour for Walls
                pygame.draw.rect(self.screen, color, 
                                 pygame.Rect(col * self.cell_size, row * self.cell_size, self.cell_size, self.cell_size))

        # Draw the start position as green colour
        pygame.draw.rect(self.screen, (0, 255, 0), 
                         pygame.Rect(self.start[1] * self.cell_size, self.start[0] * self.cell_size, self.cell_size, self.cell_size))

        # Draw the goal position as red colour
        pygame.draw.rect(self.screen, (255, 0, 0), 
                         pygame.Rect(self.goal[1] * self.cell_size, self.goal[0] * self.cell_size, self.cell_size, self.cell_size))

        # Draw checkpoints as blue colour
        for cp in self.checkpoints:
            pygame.draw.rect(self.screen, (0, 0, 255), 
                             pygame.Rect(cp[1] * self.cell_size, cp[0] * self.cell_size, self.cell_size, self.cell_size))

        # Draw the robot as a yellow coloured square
        pygame.draw.rect(self.screen, (255, 255, 0), 
                         pygame.Rect(self.state[1] * self.cell_size, self.state[0] * self.cell_size, self.cell_size, self.cell_size))

        pygame.display.flip()
        time.sleep(0.1)

    def close(self):
        pygame.quit()

In [53]:
env = MazeXEnv()
state = env.reset()
done = False
reward = 0
while not done:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            done = True
            break
    action = env.action_space.sample()  # this produces some random actions but using Q-learning we can produce desired actions also to maximize rewards
    state, reward, done, _ = env.step(action,reward)
    env.render()
    print(f"Reward: {reward}")
env.close()

Reward: -1
Reward: -2
Reward: -3
Reward: -4
Reward: -5
Reward: -6
Reward: -7
Reward: -8
Reward: -9
Reward: -10
Reward: -11
Reward: -12
Reward: -13
Reward: -14
Reward: -15
Reward: -16
Reward: -17
Reward: -18
Reward: -19
Reward: -20
Reward: -21
Reward: -22
Reward: -23
Reward: -24
Reward: -25
Reward: -26
Reward: -27
Reward: -28
Reward: -29
Reward: -30
Reward: -31
Reward: -32
Reward: -33
Reward: -34
Reward: -35
Reward: -36
Reward: -37
Reward: -38
Reward: -39
Reward: -40
Reward: -41
Reward: -42
Reward: -43
Reward: -44
Reward: -45
Reward: -46
Reward: -47
Reward: -48
Reward: -49
Reward: -50
Reward: -51
Reward: -52
Reward: -53
Reward: -54
Reward: -55
Reward: -56
Reward: -57
Reward: -58
Reward: -59
Reward: -60
Reward: -61
Reward: -62
Reward: -63
Reward: -64
Reward: -65
Reward: -66
Reward: -67
Reward: -68
Reward: -69
Reward: -70
Reward: -71
Reward: -72
Reward: -73
Reward: -74
Reward: -75
Reward: -76
Reward: -77
Reward: -78
Reward: -79
Reward: -80
Reward: -81
Reward: -82
Reward: -83
Reward: -84
R

KeyboardInterrupt: 

In [54]:
env.close()