In [7]:
import pygame
import numpy as np
import random
import time

# Definindo as constantes
GRID_SIZE = 20
GRID_WIDTH = 20
GRID_HEIGHT = 20
WINDOW_SIZE = (GRID_WIDTH * GRID_SIZE, GRID_HEIGHT * GRID_SIZE)
FPS = 10
BLACK = (0, 0, 0)
GREEN = (0, 255, 0)
RED = (255, 0, 0)


# Teste 1

- epsilon = 1.0
- min_epsilon = 0.01
- epsilon_decay = 0.995
- alpha = 0.1
- gamma = 0.9

In [8]:
class QLearningSnake:
    def __init__(self, width, height):
        self.width = width
        self.height = height
        self.q_table = np.zeros((self.width * self.height, 4))
        self.epsilon = 1.0
        self.min_epsilon = 0.01
        self.epsilon_decay = 0.995
        self.alpha = 0.1
        self.gamma = 0.9
        self.reset()

    def generate_apple(self):
        while True:
            apple = (random.randint(0, self.width - 1),
                     random.randint(0, self.height - 1))
            if apple not in self.snake:
                return apple

    def change_direction(self, direction):
        if direction == 'up' and self.direction != 'down':
            self.direction = 'up'
        elif direction == 'down' and self.direction != 'up':
            self.direction = 'down'
        elif direction == 'left' and self.direction != 'right':
            self.direction = 'left'
        elif direction == 'right' and self.direction != 'left':
            self.direction = 'right'

    def move(self):
        head = self.snake[0]
        x, y = head

        if self.direction == 'up':
            y -= 1
        elif self.direction == 'down':
            y += 1
        elif self.direction == 'left':
            x -= 1
        elif self.direction == 'right':
            x += 1

        if x < 0 or x >= self.width or y < 0 or y >= self.height or (x, y) in self.snake[1:]:
            self.game_over = True
            return True, False

        self.snake.insert(0, (x, y))

        if self.snake[0] == self.apple:
            self.score += 1
            self.apple = self.generate_apple()
            return False, True
        else:
            self.snake.pop()
            return False, False

    def get_state_index(self):
        head = self.snake[0]
        return head[1] * self.width + head[0]

    def select_action(self, state_index):
        if random.random() < self.epsilon:
            return random.randint(0, 3)  # Choose random action
        else:
            return np.argmax(self.q_table[state_index])

    def update_q_table(self, state_index, action_index, reward, next_state_index):
        old_value = self.q_table[state_index][action_index]
        next_max = np.max(self.q_table[next_state_index])
        new_value = (1 - self.alpha) * old_value + self.alpha * \
            (reward + self.gamma * next_max)
        self.q_table[state_index][action_index] = new_value

    def reset(self):
        self.snake = [(self.width // 2, self.height // 2)]
        self.apple = self.generate_apple()
        self.direction = random.choice(['up', 'down', 'left', 'right'])
        self.game_over = False
        self.score = 0

    def train(self, num_episodes):
        for episode in range(num_episodes):
            self.reset()
            done = False
            rounds_without_apple = 0
            max_rounds_without_apple = 20

            while not done:
                state_index = self.get_state_index()
                action_index = self.select_action(state_index)
                self.change_direction(
                    ['up', 'down', 'left', 'right'][action_index])

                done, ate_apple = self.move()

                reward = 0
                if done:
                    reward = -10
                elif ate_apple:
                    reward = 10
                    rounds_without_apple = 0
                else:
                    rounds_without_apple += 1

                if rounds_without_apple >= max_rounds_without_apple:
                    done = True

                next_state_index = self.get_state_index()
                self.update_q_table(state_index, action_index,
                                    reward, next_state_index)

            if self.epsilon > self.min_epsilon:
                self.epsilon *= self.epsilon_decay

            print(
                f"Episode: {episode + 1}/{num_episodes}, Score: {self.score}")

    def render(self, screen):
        screen.fill(BLACK)
        for segment in self.snake:
            pygame.draw.rect(
                screen, GREEN, (segment[0] * GRID_SIZE, segment[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
        pygame.draw.rect(
            screen, RED, (self.apple[0] * GRID_SIZE, self.apple[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
        pygame.display.flip()


class PygameSnakeGame:
    def __init__(self, q_learning_snake):
        self.snake_game = q_learning_snake

    def draw_parameters(self, screen):
        font = pygame.font.Font(None, 24)
        text_margin = 10

        score_text = font.render(
            f"Score: {self.snake_game.score}", True, (255, 255, 255))
        screen.blit(
            score_text, (WINDOW_SIZE[0] - score_text.get_width() - text_margin, text_margin))

        # Adicione outros parâmetros aqui, se desejar

    def play(self):
        # Inicializando o Pygame e executando o jogo com o modelo treinado
        pygame.init()
        pygame.display.set_caption("Snake Q-Learning")
        screen = pygame.display.set_mode(WINDOW_SIZE)
        clock = pygame.time.Clock()
        
        running = True

        while running:
            self.snake_game.reset()  # Resetar o jogo antes de começar a jogar

            rounds_without_apple = 0
            max_rounds_without_apple = 20

            while not self.snake_game.game_over:
                # Checa se o jogo foi encerrado
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        self.game_over = True
                        running = False
                
                # Escolha na Q-Table e movimento da cobra
                state_index = self.snake_game.get_state_index()
                action_index = np.argmax(self.snake_game.q_table[state_index])
                self.snake_game.change_direction(
                    ['up', 'down', 'left', 'right'][action_index])
                _, ate_apple = self.snake_game.move()

                # Checa se a cobra está em loop sem comer maçãs
                if ate_apple:
                    rounds_without_apple = 0
                else:
                    rounds_without_apple += 1
                if rounds_without_apple >= max_rounds_without_apple:
                    self.snake_game.game_over = True
                print(rounds_without_apple)
                print(max_rounds_without_apple)

                # Renderizar o jogo na janela
                screen.fill(BLACK)
                self.draw_parameters(screen)
                for segment in self.snake_game.snake:
                    pygame.draw.rect(
                        screen, GREEN, (segment[0] * GRID_SIZE, segment[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
                pygame.draw.rect(
                    screen, RED, (self.snake_game.apple[0] * GRID_SIZE, self.snake_game.apple[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
                pygame.display.flip()
                clock.tick(FPS)

            print(f"Game Over! Score: {self.snake_game.score}")
            time.sleep(1)

        pygame.quit()


In [9]:
# Realizando o treinamento bruto
snake_game = QLearningSnake(width=GRID_WIDTH, height=GRID_HEIGHT)
snake_game.train(num_episodes=100)

# Inicializando o Pygame e executando o jogo com o modelo treinado
game = PygameSnakeGame(snake_game)
game.play()

Episode: 1/100, Score: 1
Episode: 2/100, Score: 0
Episode: 3/100, Score: 0
Episode: 4/100, Score: 0
Episode: 5/100, Score: 0
Episode: 6/100, Score: 0
Episode: 7/100, Score: 0
Episode: 8/100, Score: 0
Episode: 9/100, Score: 0
Episode: 10/100, Score: 0
Episode: 11/100, Score: 0
Episode: 12/100, Score: 0
Episode: 13/100, Score: 0
Episode: 14/100, Score: 0
Episode: 15/100, Score: 0
Episode: 16/100, Score: 0
Episode: 17/100, Score: 0
Episode: 18/100, Score: 0
Episode: 19/100, Score: 0
Episode: 20/100, Score: 0
Episode: 21/100, Score: 0
Episode: 22/100, Score: 0
Episode: 23/100, Score: 0
Episode: 24/100, Score: 0
Episode: 25/100, Score: 0
Episode: 26/100, Score: 0
Episode: 27/100, Score: 0
Episode: 28/100, Score: 0
Episode: 29/100, Score: 0
Episode: 30/100, Score: 0
Episode: 31/100, Score: 0
Episode: 32/100, Score: 0
Episode: 33/100, Score: 0
Episode: 34/100, Score: 0
Episode: 35/100, Score: 0
Episode: 36/100, Score: 0
Episode: 37/100, Score: 0
Episode: 38/100, Score: 0
Episode: 39/100, Scor