We tried working with the observation/state space of the Open AI gym MsPacman, but from what we could figure out, the state space was given to us based off the rgb values of each pixel. The numpy array given to us gives us rgb values for each pixel rather than the grid, with values for each coordinate, We tried to look for a way to convert the rgb values to grid values with pellet, ghost, and pacman information, but we were unable to find a way to do so. Therefore, some of our code doesn’t run and our code below is mostly based off what we imagine would be correct if we were to have state spaces that were workable (we tried for days to no avail).

In [None]:
import gym
import numpy as np

class MsPacmanAgent:
    def __init__(self, env):
        self.env = env

    def heuristic(self, start, goal): #taken from lab2 changed a to start and b to goal)
        return abs(start[0] - goal[0]) + abs(start[1] - goal[1])

    def get_neighbors(self, state):
        position = self.get_pacman_position(state)
        moves = [(0, 1), (1, 0), (0, -1), (-1, 0)]
        neighbors = []
        for move in moves:
            neighbor = (position[0] + move[0], position[1] + move[1])
            if 0 <= neighbor[0] < GRID_SIZE and 0 <= neighbor[1] < GRID_SIZE:
                if state[neighbor[0]][neighbor[1]] != 1:  #in this case I imagine 1 would be the wall
                    neighbors.append(neighbor)
        return neighbors

    def get_successors(self, state, maximizing_player):
        pacman_pos = self.get_pacman_position(state)
        ghost_positions = self.get_ghost_positions(state)
        successors = []
        if maximizing_player:  # pacman turn
            for action in range(self.env.action_space.n):
                new_state = self.env.step(action)[0]
                successors.append((new_state, action))
        else:  # ghost turn
            for ghost_pos in ghost_positions:
                for neighbor in self.get_neighbors(ghost_pos):
                    new_ghost_positions = list(ghost_positions)
                    new_ghost_positions[ghost_positions.index(ghost_pos)] = neighbor
                    new_state = self.create_state(state, new_ghost_positions)
                    successors.append((new_state, None))
        return successors

    def get_ghost_positions(self, state):
        return [(x, y) for x in range(state.shape[0]) for y in range(state.shape[1]) if state[x, y, 2] == 1]

    def get_pacman_position(self, state):
        return [(x, y) for x in range(state.shape[0]) for y in range(state.shape[1]) if state[x, y, 0] == 1][0]

    def get_pellet_positions(self, state):
        return [(x, y) for x in range(state.shape[0]) for y in range(state.shape[1]) if state[x, y, 1] == 1]

    def create_state(self, current_state, ghost_positions):
        new_state = np.copy(current_state)
        for pos in ghost_positions:
            new_state[pos][2] = 1 
        return new_state

    def evaluate_state(self, state):
        pacman_pos = self.get_pacman_position(state)
        ghost_positions = self.get_ghost_positions(state)
        pellet_positions = self.get_pellet_positions(state)
        
        if not pellet_positions:
            return float('inf')  # All pellets eaten

        distance_to_pellets = min(self.heuristic(pacman_pos, pellet) for pellet in pellet_positions)
        distance_to_ghosts = min(self.heuristic(pacman_pos, ghost) for ghost in ghost_positions)
    
        if distance_to_ghosts < 2: # we chose 2 but we can choose lower or higher
            return -float('inf')
        return -distance_to_pellets

    def is_terminal(self, state):
        pacman_pos = self.get_pacman_position(state)
        ghost_positions = self.get_ghost_positions(state)
        pellet_positions = self.get_pellet_positions(state)

        if not pellet_positions:
            return True 
        if any(self.heuristic(pacman_pos, ghost) < 1 for ghost in ghost_positions): ##if they are on the same tile
            return True  

        return False

    def minimax(self, state, depth, alpha, beta, maximizing_player): 
        if depth == 0 or self.is_terminal(state):
            return self.evaluate_state(state)

        if maximizing_player:
            max_eval = float('-inf')
            for successor, _ in self.get_successors(state, True):
                eval = self.minimax(successor, depth - 1, alpha, beta, False)
                max_eval = max(max_eval, eval)
                alpha = max(alpha, eval)
                if beta <= alpha:
                    break
            return max_eval
        else:
            min_eval = float('inf')
            for successor, _ in self.get_successors(state, False):
                eval = self.minimax(successor, depth - 1, alpha, beta, True)
                min_eval = min(min_eval, eval)
                beta = min(beta, eval)
                if beta <= alpha:
                    break
            return min_eval

    def get_action(self, state): 
        best_action = None
        best_value = float('-inf')

        for action in range(self.env.action_space.n):
            next_state = self.env.step(action)[0]
            value = self.minimax(next_state, 3, float('-inf'), float('inf'), False)
            if value > best_value:
                best_value = value
                best_action = action
    
        return best_action

env = gym.make('MsPacman-v4', render_mode='rgb_array')
agent = MsPacmanAgent(env)

done = False
state = env.reset()

while not done:
    action = agent.get_action(state)
    state, reward, done, info = env.step(action)[:4]
    env.render()
