In [2]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [3]:
class GridPacmanEnv(gym.Env):
    """
    A grid-based Pac-Man environment that supports explicit map definitions and renders
    the grid using a combination of colored squares (for empty, wall, and pellet cells)
    and icon images (for Pac-Man and ghosts).

    The observation grid uses the following codes:
        0: Empty   -> rendered as a white square.
        1: Wall    -> rendered as a black square.
        2: Pellet  -> rendered as a gold square.
        3: Pac-Man -> rendered as an icon.
        4: Ghost   -> rendered as an icon.

    The reward is defined as the cumulative number of pellets consumed
    (i.e. starting pellets minus current remaining pellets).

    Map configuration (passed via `map_config`) may contain:
        - "grid_size": (height, width)
        - "walls": list of (row, col) tuples for walls
        - "pellets": list of (row, col) tuples for pellets
        - "pacman": (row, col) tuple for Pac-Man's start
        - "ghosts": list of (row, col) tuples for ghost start positions
    """
    metadata = {'render.modes': ['graphical']}

    def __init__(self, grid_size=(10, 10), num_ghosts=1, map_config=None):
        """
        Args:
            grid_size (tuple): (height, width) of the grid (if no map_config is provided).
            num_ghosts (int): Number of ghosts (if no map_config is provided).
            map_config (dict): Optional dictionary specifying the map layout.
        """
        super(GridPacmanEnv, self).__init__()
        self.default_grid_size = grid_size
        self.num_ghosts = num_ghosts
        self.map_config = map_config

        # Define discrete actions: 0 = Up, 1 = Right, 2 = Down, 3 = Left.
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(low=0, high=4, shape=grid_size, dtype=np.int32)

        # For dynamic Matplotlib display.
        plt.ion()  # Turn on interactive mode.
        self.fig = None
        self.ax = None

        # Load icons only for Pac-Man (code 3) and Ghost (code 4).
        self.icons = {}
        icon_paths = {
            3: "/content/Pacman.png",  # Icon for Pac-Man.
            4: "/content/ghost.png"    # Icon for Ghost.
        }
        for code, path in icon_paths.items():
            try:
                self.icons[code] = plt.imread(path)
            except FileNotFoundError:
                raise FileNotFoundError(f"Icon file for code {code} not found at path: {path}")

        self._init_game()

    def _init_game(self):
        """Initialize the game state using an explicit map configuration if provided."""
        # --- Determine grid size ---
        if self.map_config is not None:
            self.grid_size = self.map_config.get('grid_size', self.default_grid_size)
        else:
            self.grid_size = self.default_grid_size
        height, width = self.grid_size
        self.observation_space = spaces.Box(low=0, high=4, shape=self.grid_size, dtype=np.int32)

        # --- Walls ---
        self.walls = np.zeros(self.grid_size, dtype=bool)
        if self.map_config is not None and 'walls' in self.map_config:
            for coord in self.map_config['walls']:
                if 0 <= coord[0] < height and 0 <= coord[1] < width:
                    self.walls[coord] = True
        else:
            # Default: Create boundary walls.
            self.walls[0, :] = True
            self.walls[-1, :] = True
            self.walls[:, 0] = True
            self.walls[:, -1] = True

        # --- Pellets ---
        self.pellets = np.zeros(self.grid_size, dtype=bool)
        if self.map_config is not None and 'pellets' in self.map_config:
            for coord in self.map_config['pellets']:
                if 0 <= coord[0] < height and 0 <= coord[1] < width:
                    if not self.walls[coord]:
                        self.pellets[coord] = True
        else:
            # Default: Place a pellet in every non-wall cell.
            for i in range(height):
                for j in range(width):
                    if not self.walls[i, j]:
                        self.pellets[i, j] = True

        self.starting_pellets = np.sum(self.pellets)

        # --- Pac-Man ---
        if self.map_config is not None and 'pacman' in self.map_config:
            self.pacman_pos = list(self.map_config['pacman'])
        else:
            self.pacman_pos = [1, 1]
        # Consume pellet at Pac-Man's starting position, if any.
        if self.pellets[self.pacman_pos[0], self.pacman_pos[1]]:
            self.pellets[self.pacman_pos[0], self.pacman_pos[1]] = False

        # --- Ghosts ---
        if self.map_config is not None and 'ghosts' in self.map_config:
            self.ghost_positions = [list(pos) for pos in self.map_config['ghosts']]
        else:
            self.ghost_positions = []
            while len(self.ghost_positions) < self.num_ghosts:
                i = random.randint(1, height - 2)
                j = random.randint(1, width - 2)
                if [i, j] == self.pacman_pos or self.walls[i, j]:
                    continue
                if [i, j] in self.ghost_positions:
                    continue
                self.ghost_positions.append([i, j])

        self.done = False
        self.last_total_eaten = self.starting_pellets - np.sum(self.pellets)

    def get_observation(self):
        """
        Build and return the observation grid by layering walls, pellets, ghosts, and Pac-Man.
        """
        height, width = self.grid_size
        obs = np.zeros((height, width), dtype=np.int32)
        # Walls (code 1)
        obs[self.walls] = 1
        # Pellets (code 2) in non-wall cells.
        pellet_mask = self.pellets & (~self.walls)
        obs[pellet_mask] = 2
        # Ghosts (code 4) override pellets.
        for pos in self.ghost_positions:
            obs[pos[0], pos[1]] = 4
        # Pac-Man (code 3) has the highest priority.
        obs[self.pacman_pos[0], self.pacman_pos[1]] = 3
        return obs

    def reset(self, seed=None, options=None):
        """Reset the environment and return the initial observation."""
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        self._init_game()
        return self.get_observation(), {}

    def step(self, action):
        """
        Execute an action, update the game state, and return:
            observation, reward, done, truncated, info
        """
        if self.done:
            return self.get_observation(), 0, True, False, {}

        # --- Move Pac-Man ---
        new_pos = self.pacman_pos.copy()
        if action == 0:      # Up
            new_pos[0] -= 1
        elif action == 1:    # Right
            new_pos[1] += 1
        elif action == 2:    # Down
            new_pos[0] += 1
        elif action == 3:    # Left
            new_pos[1] -= 1

        # Prevent moving into walls.
        if self.walls[new_pos[0], new_pos[1]]:
            new_pos = self.pacman_pos.copy()

        # Check collision with a ghost.
        if new_pos in self.ghost_positions:
            self.pacman_pos = new_pos.copy()
            self.done = True
            current_total_eaten = self.starting_pellets - np.sum(self.pellets)
            reward = current_total_eaten - self.last_total_eaten
            self.last_total_eaten = current_total_eaten
            return self.get_observation(), reward, True, False, {}

        self.pacman_pos = new_pos.copy()
        # Consume pellet if present.
        if self.pellets[new_pos[0], new_pos[1]]:
            self.pellets[new_pos[0], new_pos[1]] = False

        # --- Move Ghosts ---
        for idx, ghost_pos in enumerate(self.ghost_positions):
            possible_moves = []
            for move in [(0, 1), (1, 0), (0, -1), (-1, 0)]:
                candidate = [ghost_pos[0] + move[0], ghost_pos[1] + move[1]]
                if (0 <= candidate[0] < self.grid_size[0] and
                    0 <= candidate[1] < self.grid_size[1] and
                    not self.walls[candidate[0], candidate[1]]):
                    possible_moves.append(candidate)
            if possible_moves:
                new_ghost_pos = random.choice(possible_moves)
            else:
                new_ghost_pos = ghost_pos.copy()
            if new_ghost_pos == self.pacman_pos:
                self.ghost_positions[idx] = new_ghost_pos.copy()
                self.done = True
            else:
                self.ghost_positions[idx] = new_ghost_pos.copy()

        # --- Check Terminal Condition ---
        if np.sum(self.pellets) == 0:
            self.done = True

        # --- Compute Reward ---
        current_total_eaten = self.starting_pellets - np.sum(self.pellets)
        reward = current_total_eaten - self.last_total_eaten
        self.last_total_eaten = current_total_eaten

        return self.get_observation(), reward, self.done, False, {}

    def render_graphical(self):
        """
        Dynamically update a single Matplotlib window with the current grid.
        The cells for empty, wall, and pellet are drawn as colored squares:
            - Empty (code 0): white
            - Wall  (code 1): black
            - Pellet(code 2): a white square with a small orange square at its center
        Pac-Man (code 3) and Ghosts (code 4) are rendered using their icon images.
        """
        obs = self.get_observation()
        height, width = self.grid_size

        # Create the figure/axis if not already present.
        if self.fig is None:
            self.fig, self.ax = plt.subplots(figsize=(width, height))
        else:
            self.ax.clear()

        # Draw each cell.
        for i in range(height):
            for j in range(width):
                code = obs[i, j]
                # Coordinates: cell (i, j) spans from (j, i) to (j+1, i+1)
                if code in [3, 4]:
                    # Draw the icon image for Pac-Man or Ghost.
                    self.ax.imshow(self.icons[code], extent=(j, j+1, i, i+1), interpolation='none')
                elif code == 2:
                    # For pellets, draw a white square background...
                    self.ax.add_patch(patches.Rectangle((j, i), 1, 1, facecolor="white", edgecolor='gray'))
                    # ...then a small orange square centered in the cell.
                    # For a 1x1 cell, a 0.5x0.5 square centered is positioned at (j+0.25, i+0.25).
                    self.ax.add_patch(patches.Rectangle((j + 0.25, i + 0.25), 0.5, 0.5, facecolor="orange", edgecolor='none'))
                else:
                    # For empty (code 0) and wall (code 1) cells.
                    if code == 0:
                        color = "white"
                    elif code == 1:
                        color = "black"
                    self.ax.add_patch(patches.Rectangle((j, i), 1, 1, facecolor=color, edgecolor='gray'))

        self.ax.set_xlim(0, width)
        self.ax.set_ylim(height, 0)  # Invert y-axis so row 0 is at the top.
        self.ax.set_xticks([])
        self.ax.set_yticks([])
        self.ax.set_aspect('equal')
        self.ax.set_title('Pac-Man Grid')

        self.fig.canvas.draw_idle()
        plt.pause(0.0001)

    def close(self):
        """Close the Matplotlib figure."""
        if self.fig is not None:
            plt.close(self.fig)
            self.fig = None

# --- Example Usage ---
if __name__ == "__main__":
    # Define an explicit map configuration.
    example_map = {
        "grid_size": (10, 10),
        "walls": [
            *( (0, j) for j in range(10) ),
            *( (9, j) for j in range(10) ),
            *( (i, 0) for i in range(10) ),
            *( (i, 9) for i in range(10) ),
            # Internal walls.
            (3, 3), (3, 4), (3, 5),
            (4, 5),
            (5, 5)
        ],
        "pellets": [
            (1, 2), (1, 3), (1, 4),
            (2, 2), (2, 3), (2, 4),
            (4, 2), (4, 3),
            (5, 2), (5, 3),
            (6, 2), (6, 3)
        ],
        "pacman": (1, 1),
        "ghosts": [(7, 7), (2, 7)]
    }

    env = GridPacmanEnv(map_config=example_map)
    obs, _ = env.reset()
    env.render_graphical()

    done = False
    step_count = 0
    while not done and step_count < 20:
        action = env.action_space.sample()  # Random action for demonstration.
        obs, reward, done, truncated, info = env.step(action)
        env.render_graphical()  # Dynamically update the same window.
        step_count += 1

    env.close()


FileNotFoundError: Icon file for code 3 not found at path: /content/Pacman.png