In [23]:
import random

## FUNCTION FROM CHAT GPT
def create_maze(size, seed=None):
    if seed is not None:
        random.seed(seed)
    
    maze = [[2] * (size + 2) for _ in range(size + 2)]  # Initialize maze with all walls
    
    # Create an open space in the center
    center = size // 2
    maze[center + 1][center + 1] = 0
    
    # Recursive backtracking algorithm to generate the maze
    def generate(x, y):
        directions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
        random.shuffle(directions)
        
        for dx, dy in directions:
            nx, ny = x + 2*dx, y + 2*dy
            if 0 < nx < size + 1 and 0 < ny < size + 1 and maze[ny][nx]:
                maze[y + dy][x + dx] = 0
                maze[ny][nx] = 0
                generate(nx, ny)
    
    generate(center + 1, center + 1)
    
    # Surround the maze with walls
    for i in range(size + 2):
        maze[i][0] = 2
        maze[i][-1] = 2
        maze[0][i] = 2
        maze[-1][i] = 2
    
    return maze

# Example usage:
size = 10
seed = 123  # Change the seed to generate different mazes
maze = create_maze(size, seed)
maze[6][6] = 1
maze[-4][-4] = 4
maze[4][6] = 3
new_pos = (6,6)
new_pos_type = maze[new_pos[0]][new_pos[1]]
for row in maze:
    print(row)
new_pos_type

[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
[2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]
[2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2]
[2, 2, 0, 0, 0, 2, 3, 0, 0, 0, 0, 2]
[2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2]
[2, 2, 0, 0, 0, 2, 1, 2, 0, 0, 0, 2]
[2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 0, 2]
[2, 2, 0, 2, 0, 2, 0, 2, 4, 2, 0, 2]
[2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2]
[2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2]
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]


1

In [13]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import matplotlib.pyplot as plt
from collections import deque

maze_seed = 123

class Maze(gym.Env):
    """
    Custom Environment for Stable Baseline 3 for the classic Snake 
    """
    metadata = {'render.modes': ['console','rgb_array']}
    #Direction constants
    n_actions = 4 #3 possible steps each turn
    UP = 0
    DOWN = 1
    RIGHT = 2
    LEFT = 3
    #Grid label constants
    EMPTY = 0
    PLAYER = 1
    WALL = 2
    KEY = 3
    GOAL = 4
    #Rewards
    #REWARD_PER_STEP = 0 # reward for every step taken, gets into infinite loops if >0
    #Define Max steps to avoid infinite loops
    #should be lower than -REWARD_PER_STEP_TOWARDS_FOOD to avoid hitting wall intentionally
    REWARD_PER_STEP_TOWARDS_GOAL = 1 #give reward for moving towards food and penalty for moving away
    REWARD_FOR_KEY = 50 
    REWARD_FOR_GOAL = 50
    MAX_STEPS = 100 #stop if we go too long without food to avoid infinite loops


    def __init__(self, grid_size=12):
        super(Maze, self).__init__()

        self.stepnum = 0
        # Size of the 2D grid (including walls)
        self.grid_size = grid_size
        # Initialize the snake
        
        #Init the grid
        
        self.grid = create_maze(self.grid_size - 2, maze_seed)

        #sets player location
        self.player_location = (6,6)
        self.grid[self.player_location[0]][self.player_location[1]] = 1

        self.goal_location = (-4,-4)
        self.grid[-4][-4] = 4

        self.key_location = (4,6)
        self.grid[4][6] = 3

        self.key_status = 0
        
        #Init distance to food
        self.player_dist_to_Key = self.grid_distance(self.grid,self.player_location, self.key_location)
        self.player_dist_to_Goal = self.grid_distance(self.grid,self.player_location, self.goal_location)
        #Store init values
        self.init_grid = self.grid.copy()
        self.init_player_location = self.player_location.copy()
        self.init_goal_location = self.goal_location.copy()
        self.init_key_location = self.key_location.copy()
        self.init_key_status = self.key_status.copy()
        
        # The action space
        self.action_space = spaces.Discrete(self.n_actions)
        # The observation space, "position" is the coordinates of the head; "direction" is which way the sanke is heading, "grid" contains the full grid info
        self.observation_space = gym.spaces.Dict(
            spaces={
                "position": gym.spaces.Box(low=0, high=(self.grid_size-1), shape=(2,), dtype=np.int32),
                "key": gym.spaces.Box(low=0,high = 1, shape = (1,), dtype = np.int32),
                #"direction": gym.spaces.Box(low=-1, high=1, shape=(2,), dtype=np.int32),
                "grid": gym.spaces.Box(low = 0, high = 3, shape = (self.grid_size, self.grid_size), dtype=np.uint8),
            })
        
        ## FUNCTION FROM CHAT GPT
    def grid_distance(maze, start, end):
        directions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
        visited = set()
        queue = deque([(start, 0)])  # Start position and distance

        while queue:
            (x, y), distance = queue.popleft()
            if (x, y) == end:
                return distance  # Return the distance when the end is reached
            for dx, dy in directions:
                nx, ny = x + dx, y + dy
                if 0 <= nx < len(maze[0]) and 0 <= ny < len(maze) and maze[ny][nx] == 0 and (nx, ny) not in visited:
                    visited.add((nx, ny))
                    queue.append(((nx, ny), distance + 1))

        return float('inf')  # If end is not reachable
    
    def reset(self):
        super().reset(seed=seed)
        self.stepnum = 0
        self.grid = self.init_grid.copy()
        self.player_location = self.init_player_location.copy()
        self.key_location = self.init_goal_location.copy()
        self.goal_location = self.init_key_location.copy()
        self.key_status = self.init_key_status
        #Init distance to food
        self.player_dist_to_Key = self.grid_distance(self.grid,self.player_location, self.key_location)
        self.player_dist_to_Goal = self.grid_distance(self.grid,self.player_location, self.goal_location)
        return self._get_obs(), {}  
    
    def _get_obs(self):
            #direction = np.array(self.snake_coordinates[-1]) - np.array(self.snake_coordinates[-2])
            #return observation in the format of self.observation_space
            return {"position": np.array(self.player_location,dtype=np.int32),
                    "key": self.key_status,
                    #"direction" : direction.astype(np.int32),
                    "grid": self.grid}
    def step(self,action):
     
        if action == self.UP:
            step = (-1,0) 
        elif action == self.RIGHT:
            step = (0,1)
        elif action == self.LEFT:
            step = (0,-1)
        elif action == self.DOWN:
            step = (1,0)
        else:
            raise ValueError("Action=%d is not part of the action space"%(action))
        #New head coordinate
        new_coord = (self.player_location + step).astype(np.int32)
        #grow snake     
        
        
        #Check what is at the new position
        new_pos = new_coord
        new_pos_type = self.grid[new_pos[0]][new_pos[1]]
        #self.grid[new_pos[0]][new_pos[1]] = self.PLAYER #this position is now occupied by the player
        done = False; early = False; reward = 0 #by default the game goes on and no reward   
        if new_pos_type == self.KEY:
            reward += self.REWARD_FOR_KEY
            self.key_status = 1
            #Put down a new food item
        elif new_pos_type == self.GOAL:
            reward += self.REWARD_FOR_GOAL
            done = True
        else:
           
            self.grid[ self.snake_coordinates[0] ] = self.EMPTY
            self.snake_coordinates = self.snake_coordinates[1:]
            if  (new_pos_type == self.WALL) or (new_pos_type == self.SNAKE):
                done = True #stop if we hit the wall or the snake
                reward += self.REWARD_WALL_HIT #penalty for hitting walls/tail
#             else:
#                 reward += self.REWARD_PER_STEP
        self.player_location= new_pos
        self.grid[new_pos[0]][new_pos[1]] = self.PLAYER #this position is now occupied by the player
                
        #Update distance to food and reward if closer
        head_dist_to_food_prev = self.head_dist_to_food
        self.head_dist_to_food = self.grid_distance( self.snake_coordinates[-1],np.argwhere(self.grid==self.FOOD)[0] )
        if head_dist_to_food_prev > self.head_dist_to_food:
            reward += self.REWARD_PER_STEP_TOWARDS_FOOD #reward for getting closer to food
        elif head_dist_to_food_prev < self.head_dist_to_food:
            reward -= self.REWARD_PER_STEP_TOWARDS_FOOD #penalty for getting further
        
        #Stop if we played too long without getting food
        if ( (self.stepnum - self.last_food_step) > self.MAX_STEPS_AFTER_FOOD ): 
            done = True    
        self.stepnum += 1

        return  self._get_obs(), reward, done, early, {}                 
    


        
    

In [15]:
#TESTING BLOCK

x = (np.array((6,6)) + (1,0)).astype(np.int32)
x

array([7, 6])