In [11]:
pip install pygame


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
import pygame

# Set up the game window
WINDOW_WIDTH = 600
WINDOW_HEIGHT = 800
pygame.init()
game_window = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
pygame.display.set_caption("Pac-Man with AI Ghosts")


In [13]:
class PacMan(pygame.sprite.Sprite):
    def __init__(self):
        super().__init__()
        self.image = pygame.image.load("pacman.png").convert_alpha()
        self.rect = self.image.get_rect()
        self.rect.x = WINDOW_WIDTH / 2
        self.rect.y = WINDOW_HEIGHT / 2
        self.speed = 5

    def update(self):
        keys = pygame.key.get_pressed()
        if keys[pygame.K_LEFT]:
            self.rect.x -= self.speed
        elif keys[pygame.K_RIGHT]:
            self.rect.x += self.speed
        elif keys[pygame.K_UP]:
            self.rect.y -= self.speed
        elif keys[pygame.K_DOWN]:
            self.rect.y += self.speed


In [14]:
class Ghost(pygame.sprite.Sprite):
    def __init__(self, color):
        super().__init__()
        self.color = color
        self.image = pygame.Surface([20, 20])
        self.image.fill(color)
        self.rect = self.image.get_rect()
        self.rect.x = WINDOW_WIDTH / 2 - 10
        self.rect.y = WINDOW_HEIGHT / 2 - 10
        self.speed = 3

    def update(self, target):
        dx = target.rect.x - self.rect.x
        dy = target.rect.y - self.rect.y
        distance = math.hypot(dx, dy)
        dx, dy = dx / distance, dy / distance
        self.rect.x += dx * self.speed
        self.rect.y += dy * self.speed


In [15]:
class QGhost(Ghost):
    def __init__(self, color, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.1):
        super().__init__(color)
        self.q_table = defaultdict(lambda: np.zeros(4))
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate

    def update(self, target):
        state = self.get_state(target)
        action = self.get_action(state)
        reward = self.get_reward(target)

        next_state = self.get_state(target)
        next_action = self.get_action(next_state)

        q_value = self.q_table[state][action]
        next_q_value = self.q_table[next_state][next_action]
        td_error = reward + self.discount_factor * next_q_value - q_value
        self.q_table[state][action] += self.learning_rate * td_error

        dx, dy = self.get_direction(action)
        self.rect.x += dx * self.speed
        self.rect.y += dy * self.speed

    def get_state(self, target):
        dx = target.rect.x - self.rect.x
        dy = target.rect.y - self.rect.y
        state = round(dx / 10), round(dy / 10)
        return state

    def get_action(self, state):
        if np.random.rand() < self.exploration_rate:
            action = np.random.randint(0, 4)
        else:
            action = np.argmax(self.q_table[state])
        return action

    def get_reward(self, target):
        dx = target.rect.x - self.rect.x
        dy = target.rect.y - self.rect.y
        distance = math.hypot(dx, dy)
        reward = -distance
        return reward

    def get_direction(self, action):
        if action == 0:
            return 1, 0
        elif action == 1:
            return -1, 0
        elif action == 2:
            return 0, 1
        else:
            return 0, -1


In [16]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam

class NNGhost(Ghost):
    def __init__(self, color):
        super().__init__(color)
        self.model = self.create_model()

    def create_model(self):
        model = Sequential()
        model.add(Dense(32, input_shape=(2,), activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(4, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=0.001))
        return model

    def update(self, target):
        state = self.get_state(target)
        action = self.get_action(state)
        reward = self.get_reward(target)

        next_state = self.get_state(target)
        next_action = self.get_action(next_state)

        q_values = self.model.predict(np.array([state]))
        q_values[0][action] = reward + 0.9 * np.max(self.model.predict(np.array([next_state])))

        self.model.fit(np.array([state]), q_values, verbose=0)

        dx, dy = self.get_direction(action)
        self.rect.x += dx * self.speed
        self.rect.y += dy * self.speed

    def get_state(self, target):
        dx = target.rect.x - self.rect.x
        dy = target.rect.y - self.rect.y
        state = np.array([dx, dy]) / 10
        return state

    def get_action(self, state):
        q_values = self.model.predict(np.array([state]))
        return np.argmax(q_values[0])

    def get_reward(self, target):
        dx = target.rect.x - self.rect.x
        dy = target.rect.y - self.rect.y
        distance = math.hypot(dx, dy)
        reward = -distance
        return reward

    def get_direction(self, action):
        if action == 0:
            return 1, 0
        elif action == 1:
            return -1, 0
        elif action == 2:
            return 0, 1
        else:
            return 0, -1


In [35]:
import random

class PacManGame:
    def __init__(self, grid_size=10):
        self.grid_size = grid_size
        self.reset()

    def reset(self):
        # Generate all possible positions as (x,y) tuples
        positions = [(x, y) for x in range(self.grid_size) for y in range(self.grid_size)]
        # Pick random start but convert to list so it's mutable
        self.ghost_pos = list(random.choice(positions))
        pac = random.choice(positions)
        # Make sure Pac-Man doesn't start on the ghost
        while pac == tuple(self.ghost_pos):
            pac = random.choice(positions)
        self.pacman_pos = list(pac)

        self.steps = 0
        self.max_steps = 50
        self.caught = False

    def over(self):
        return self.steps >= self.max_steps or self.caught

    def ghost_caught_pacman(self):
        return self.caught

    def move_ghost(self, action):
        dx = dy = 0
        if action == 'up':    dy = -1
        elif action == 'down':dy = 1
        elif action == 'left':dx = -1
        elif action == 'right':dx = 1

        # Update ghost position
        self.ghost_pos[0] = max(0, min(self.grid_size-1, self.ghost_pos[0] + dx))
        self.ghost_pos[1] = max(0, min(self.grid_size-1, self.ghost_pos[1] + dy))
        self.steps += 1

        if self.ghost_pos == self.pacman_pos:
            self.caught = True

    def move_pacman(self):
        direction = random.choice(['up','down','left','right','stay'])
        dx = dy = 0
        if direction == 'up':    dy = -1
        elif direction == 'down':dy = 1
        elif direction == 'left':dx = -1
        elif direction == 'right':dx = 1
        # Update Pac-Man position
        self.pacman_pos[0] = max(0, min(self.grid_size-1, self.pacman_pos[0] + dx))
        self.pacman_pos[1] = max(0, min(self.grid_size-1, self.pacman_pos[1] + dy))


In [36]:
class QGhost:
    def __init__(self, actions=['up', 'down', 'left', 'right'], alpha=0.1, gamma=0.9, epsilon=0.2):
        self.q_table = {}
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.actions = actions

    def get_state(self, ghost_pos, pacman_pos):
        dx = pacman_pos[0] - ghost_pos[0]
        dy = pacman_pos[1] - ghost_pos[1]
        return (dx, dy)
    def choose_action(self, state):
        if state not in self.q_table:
            self.q_table[state] = {a: 0 for a in self.actions}

        if random.random() < self.epsilon:
            return random.choice(self.actions)
        else:
            return max(self.q_table[state], key=self.q_table[state].get)

    def learn(self, state, action, reward, next_state):
        if next_state not in self.q_table:
            self.q_table[next_state] = {a: 0 for a in self.actions}
        predict = self.q_table[state][action]
        target = reward + self.gamma * max(self.q_table[next_state].values())
        self.q_table[state][action] += self.alpha * (target - predict)



In [37]:

game = PacManGame()
ghost = QGhost()

episodes = 1000

for episode in range(episodes):
    game.reset()
    while not game.over():
        game.move_pacman()

        state = ghost.get_state(game.ghost_pos, game.pacman_pos)
        action = ghost.choose_action(state)
        game.move_ghost(action)
        next_state = ghost.get_state(game.ghost_pos, game.pacman_pos)
        reward = 10 if game.ghost_caught_pacman() else -1
        ghost.learn(state, action, reward, next_state)

In [38]:
# Disable exploration for testing
ghost.epsilon = 0
successes = 0
test_episodes = 100

for episode in range(test_episodes):
    game.reset()
    while not game.over():
        game.move_pacman()

        state = ghost.get_state(game.ghost_pos, game.pacman_pos)
        action = ghost.choose_action(state)
        game.move_ghost(action)
    if game.ghost_caught_pacman():
        successes += 1

accuracy = (successes / test_episodes) * 100
print(f"Test Accuracy: {accuracy:.2f}%")


Test Accuracy: 96.00%
