In [1]:
import pygame
import random
import numpy as np

# Define as constantes do jogo
WIDTH = 800
HEIGHT = 600
GRID_SIZE = 20
GRID_WIDTH = WIDTH // GRID_SIZE
GRID_HEIGHT = HEIGHT // GRID_SIZE
FPS = 10

# Cores
BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
GREEN = (0, 255, 0)
RED = (255, 0, 0)

pygame 2.5.0 (SDL 2.28.0, Python 3.7.6)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# Inicializa o Pygame
pygame.init()
clock = pygame.time.Clock()
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Snake Q-Learning")


class QLearningSnake:
    def __init__(self, width, height):
        self.width = width
        self.height = height
        self.snake = [(width // 2, height // 2)]
        self.direction = 'up'
        self.apple = self.generate_apple()
        self.score = 0
        self.game_over = False

        # Q-Learning parameters
        self.alpha = 0.1  # Learning rate
        self.gamma = 0.9  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.min_epsilon = 0.01
        self.epsilon_decay = 0.99

        # Initialize Q-table with zeros
        self.q_table = np.zeros((width * height, width * height, 4))

    def generate_apple(self):
        while True:
            apple = (random.randint(0, self.width - 1),
                     random.randint(0, self.height - 1))
            if apple not in self.snake:
                return apple

    def change_direction(self, direction):
        if direction == 'up' and self.direction != 'down':
            self.direction = 'up'
        elif direction == 'down' and self.direction != 'up':
            self.direction = 'down'
        elif direction == 'left' and self.direction != 'right':
            self.direction = 'left'
        elif direction == 'right' and self.direction != 'left':
            self.direction = 'right'

    def move(self):
        head = self.snake[0]
        if self.direction == 'up':
            new_head = (head[0], head[1] - 1)
        elif self.direction == 'down':
            new_head = (head[0], head[1] + 1)
        elif self.direction == 'left':
            new_head = (head[0] - 1, head[1])
        elif self.direction == 'right':
            new_head = (head[0] + 1, head[1])

        found_apple = False

        if (
            new_head[0] < 0 or new_head[0] >= self.width or
            new_head[1] < 0 or new_head[1] >= self.height or
            new_head in self.snake
        ):
            self.game_over = True
            return True, found_apple

        self.snake.insert(0, new_head)
        if new_head == self.apple:
            self.score += 1
            self.apple = self.generate_apple()
            found_apple = True
        else:
            self.snake.pop()

        return False, found_apple

    def get_state_index(self):
        head = self.snake[0]
        return head[1] * self.width + head[0]

    def select_action(self, state_index):
        if random.random() < self.epsilon:
            return random.randint(0, 3)  # Choose random action
        else:
            return np.argmax(self.q_table[state_index])

    def update_q_table(self, state_index, action_index, reward, next_state_index):
        old_q_value = self.q_table[state_index][action_index]
        max_next_q_value = np.max(self.q_table[next_state_index])
        new_q_value = (1 - self.alpha) * old_q_value + \
            self.alpha * (reward + self.gamma * max_next_q_value)
        self.q_table[state_index][action_index] = new_q_value

    def train(self, num_episodes, game):
        for episode in range(num_episodes):
            self.reset()
            game.reset()

            while not self.game_over:
                state_index = self.get_state_index()
                action_index = self.select_action(state_index)

                self.change_direction(
                    ['up', 'down', 'left', 'right'][action_index])
                game.handle_events()

                game_over, found_apple = self.move()

                if found_apple:
                    reward = 10
                elif game_over:
                    reward = -10
                else:
                    reward = -1

                next_state_index = self.get_state_index()
                self.update_q_table(state_index, action_index,
                                    reward, next_state_index)

                game.update(self.snake, self.apple)
                game.render()

            self.epsilon = max(
                self.min_epsilon, self.epsilon * self.epsilon_decay)

            print(f"Episode: {episode+1}/{num_episodes}, Score: {self.score}")

    def play(self, game):
        self.reset()
        game.reset()

        while not self.game_over:
            state_index = self.get_state_index()
            action_index = np.argmax(self.q_table[state_index])

            self.change_direction(
                ['up', 'down', 'left', 'right'][action_index])
            game.handle_events()

            game_over, _ = self.move()

            game.update(self.snake, self.apple)
            game.render()

            if game_over:
                print(f"Game Over! Score: {self.score}")

    def reset(self):
        self.snake = [(self.width // 2, self.height // 2)]
        self.direction = 'up'
        self.apple = self.generate_apple()
        self.score = 0
        self.game_over = False


class Game:
    def __init__(self, snake_game):
        self.snake_game = snake_game

    def reset(self):
        self.snake_game.reset()

    def handle_events(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()

    def update(self, snake, apple):
        pass

    def render(self):
        pass


class PygameSnakeGame(Game):
    def __init__(self, snake_game):
        super().__init__(snake_game)

    def update(self, snake, apple):
        self.handle_events()

    def render(self):
        screen.fill(BLACK)
        snake_game = self.snake_game
        for segment in snake_game.snake:
            pygame.draw.rect(
                screen, GREEN, (segment[0] * GRID_SIZE, segment[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
        pygame.draw.rect(
            screen, RED, (snake_game.apple[0] * GRID_SIZE, snake_game.apple[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
        pygame.display.update()
        clock.tick(FPS)





In [3]:
# Test the QLearningSnake class
snake_game = QLearningSnake(width=GRID_WIDTH, height=GRID_HEIGHT)
game = PygameSnakeGame(snake_game)

snake_game.train(num_episodes=1000, game=game)
snake_game.play(game)

pygame.quit()


Episode: 1/1000, Score: 0
Episode: 2/1000, Score: 0
Episode: 3/1000, Score: 0
Episode: 4/1000, Score: 0
Episode: 5/1000, Score: 0
Episode: 6/1000, Score: 0


IndexError: list index out of range