# 1. Przygotowanie środowiska programistycznego
Zaimportowanie niezbędnych bibliotek i narzędzi do pracy z danymi oraz modelami uczenia maszynowego.


In [None]:
import pygame
import sys
import random
from collections import defaultdict
import numpy as np
from numpy import argmax
from dataclasses import dataclass
import enum

SCREEN_WIDTH = 640
SCREEN_HEIGHT = 640
FPS = 6
SNAKE = (0, 255, 0)
EMPTY = (0, 0, 0)
FOOD = (255, 0, 0)
CELL_GRID = (20, 20)
CELL_SIZE = (SCREEN_WIDTH / CELL_GRID[0], SCREEN_HEIGHT / CELL_GRID[1])


# 2. Metody i typy pomocniczne


In [None]:


class Action(enum.Enum):
    LEFT = 1
    RIGHT = 2
    STRAIGHT = 3
def tuple_to_action(t) -> Action:
    index = t.index(1)
    return list(Action)[index]
@dataclass
class Vector:
    x: int
    y: int

    def __eq__(self, other):
        if isinstance(other, Vector):
            return self.x == other.x and self.y == other.y
        return False

    def __add__(self, other):
        if isinstance(other, Vector):
            return Vector(self.x + other.x, self.y + other.y)
        return NotImplemented

# 3. Logika gry Snake z AI

In [7]:
class Game:
    def __init__(self):
        pygame.init()
        self.screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
        pygame.display.set_caption("Snake AI")
        self.clock = pygame.time.Clock()
        self.running = True
        self.food = Vector(0, 0)
        self.direction = Vector(1, 0)
        self.snake = Vector(CELL_GRID[0] // 2, CELL_GRID[1] // 2)
        self.rand_food()


    def handle_keydown(self, key):
        if key == pygame.K_q:
            self.running = False
        
    def handle_action(self, action: Action):
        dx, dy = self.direction.x, self.direction.y
        if action == Action.LEFT:
            self.direction = Vector(-dy, dx)
        elif action == Action.RIGHT:
            self.direction = Vector(dy, -dx)
        elif action == Action.STRAIGHT:
            pass
        
    def handle_events(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.running = False
            elif event.type == pygame.KEYDOWN:
                self.handle_keydown(event.key)

    def rand_food(self):
        while True:
            x = random.randrange(CELL_GRID[0])
            y = random.randrange(CELL_GRID[1])
            self.food = Vector(x, y)
            if not self.snake == self.food:
                break

    def get_distance(self) -> int:
        return abs(self.snake.x - self.food.x) + abs(self.snake.y - self.food.y)


    def step(self, action : Action) -> tuple[tuple, float, bool]:
        old_distance = self.get_distance()
        self.handle_events()
        self.handle_action(action)
        self.snake = self.snake + self.direction
        done = False
        reward = -0.1
        new_distance = self.get_distance()

        if new_distance < old_distance:
            reward = 0.1
        if self.snake == self.food:
            self.rand_food()
            reward = 10
        if (
            self.snake.x > CELL_GRID[0]
            or self.snake.x < 0
            or self.snake.y > CELL_GRID[1]
            or self.snake.y < 0
        ):
            reward = -10
            done = True
            self.reset()
        return (self.get_state(), reward, done)

    def get_state(self) -> tuple[bool, bool, bool]:
        food_front = False
        food_left = False
        food_right = False

        if self.direction == Vector(1, 0):  # Moving RIGHT
            food_front = self.snake.x < self.food.x
            food_left = self.snake.y > self.food.y
            food_right = self.snake.y < self.food.y
        elif self.direction == Vector(-1, 0):  # Moving LEFT
            food_front = self.snake.x > self.food.x
            food_left = self.snake.y < self.food.y
            food_right = self.snake.y > self.food.y
        elif self.direction == Vector(0, 1):  # Moving DOWN
            food_front = self.snake.y < self.food.y
            food_left = self.snake.x < self.food.x
            food_right = self.snake.x > self.food.x
        elif self.direction == Vector(0, -1):  # Moving UP
            food_front = self.snake.y > self.food.y
            food_left = self.snake.x > self.food.x
            food_right = self.snake.x < self.food.x

        return (food_front, food_left, food_right)

    def reset(self):
        self.rand_food()
        self.snake = Vector(CELL_GRID[0] // 2, CELL_GRID[1] // 2)

    def draw(self):
        self.screen.fill(EMPTY)

        # Draw snake
        snake_rect = pygame.Rect(
            self.snake.x * CELL_SIZE[0],
            self.snake.y * CELL_SIZE[1],
            CELL_SIZE[0],
            CELL_SIZE[1],
        )
        pygame.draw.rect(self.screen, SNAKE, snake_rect)

        # Draw food
        food_rect = pygame.Rect(
            self.food.x * CELL_SIZE[0],
            self.food.y * CELL_SIZE[1],
            CELL_SIZE[0],
            CELL_SIZE[1],
        )
        pygame.draw.rect(self.screen, FOOD, food_rect)

        pygame.display.flip()

    def run(self):
        while self.running:
            self.handle_events()
            self.step(action=Action.RIGHT)
            self.draw()
            self.clock.tick(FPS)

        pygame.quit()
        sys.exit()



# 4. Tworzenia agenta 


In [None]:
def randomAction() -> tuple[int, int, int]:
    index = random.randint(0, 2)
    return (1 if index == 0 else 0, 1 if index == 1 else 0, 1 if index == 2 else 0)


n_actions = 3


class AgentQ:
    def __init__(self, game: Game) -> None:
        self.Q_Table = defaultdict(lambda: np.zeros(n_actions))
        self.alpha = 1
        self.gamma = 1
        self.epsilon = 1
        self.game = game

    def get_action(self, state: tuple[bool, bool, bool]):
        if random.random() < self.epsilon:
            return randomAction()
        else:
            return argmax(self.Q_Table[state])

    def make_step(self):
        old_state = self.game.get_state()
        action = self.get_action(old_state)
        state, reward, done = self.game.step(tuple_to_action(action))

    def calculate_q(self, current_q, max_future_q, reward):
        return current_q + self.alpha * (reward + self.gamma * max_future_q - current_q)

SystemExit: 