# 1. Przygotowanie środowiska programistycznego
Zaimportowanie niezbędnych bibliotek i narzędzi do pracy z danymi oraz modelami uczenia maszynowego.


In [1]:
import pygame
import sys
import random
from collections import defaultdict,deque
import numpy as np
from numpy import argmax
from dataclasses import dataclass
import enum
import matplotlib.pyplot as plt
import statistics
from IPython.display import clear_output
import pickle
SCREEN_WIDTH = 640
SCREEN_HEIGHT = 640
FPS = 10
SNAKE = (0, 255, 0)
EMPTY = (0, 0, 0)
FOOD = (255, 0, 0)
CELL_GRID = (20, 20)
CELL_SIZE = (SCREEN_WIDTH / CELL_GRID[0], SCREEN_HEIGHT / CELL_GRID[1])


pygame 2.6.1 (SDL 2.32.62, Python 3.14.2)
Hello from the pygame community. https://www.pygame.org/contribute.html


# 2. Metody i typy pomocniczne


In [2]:


class Action(enum.Enum):
    LEFT = 1
    RIGHT = 2
    STRAIGHT = 3
def tuple_to_action(t) -> Action:
    return list(Action)[t]
def save_agent(agent, filename="q_table.pkl"):
    with open(filename, "wb") as f:
        pickle.dump(dict(agent.Q_Table), f)
def load_agent(agent, filename="q_table.pkl"):
    import pickle
    with open(filename, "rb") as f:
        data = pickle.load(f)
        agent.Q_Table = defaultdict(lambda: np.zeros(3), data)
@dataclass
class Vector:
    x: int
    y: int

    def __eq__(self, other):
        if isinstance(other, Vector):
            return self.x == other.x and self.y == other.y
        return False

    def __add__(self, other):
        if isinstance(other, Vector):
            return Vector(self.x + other.x, self.y + other.y)
        return NotImplemented
    
    
    
def live_plot(episode_rewards, episode_points=None):
    clear_output(wait=True)  # czyści poprzedni wykres
    episodes = np.arange(1, len(episode_rewards) + 1)

    plt.figure(figsize=(12,5))

    # nagrody
    plt.subplot(1, 2, 1)
    plt.plot(episodes, episode_rewards, label="Total reward")
    plt.xlabel("Episode")
    plt.ylabel("Reward")
    plt.title("Reward per Episode")
    plt.grid(True)
    plt.legend()

    # punkty
    if episode_points is not None:
        plt.subplot(1, 2, 2)
        plt.plot(episodes, episode_points, label="Points", color="orange")
        plt.xlabel("Episode")
        plt.ylabel("Points")
        plt.title("Points per Episode")
        plt.grid(True)
        plt.legend()

    plt.tight_layout()
    plt.show()

# 3. Logika gry Snake z AI

In [None]:
class Game:
    def __init__(self):
        pygame.init()
        self.screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
        pygame.display.set_caption("Snake AI")
        self.clock = pygame.time.Clock()
        self.running = True
        self.food = Vector(0, 0)
        self.direction = Vector(1, 0)
        self.snake : deque[Vector] = deque()
        self.snake.appendleft(Vector(CELL_GRID[0] // 2, CELL_GRID[1] // 2))
        self.rand_food()
        self.points = 0


    def handle_keydown(self, key):
        if key == pygame.K_q:
            self.running = False
        
    def handle_action(self, action: Action):
        dx, dy = self.direction.x, self.direction.y
        if action == Action.LEFT:
            self.direction = Vector(-dy, dx)
        elif action == Action.RIGHT:
            self.direction = Vector(dy, -dx)
        elif action == Action.STRAIGHT:
            pass
        
    def handle_events(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.running = False
            elif event.type == pygame.KEYDOWN:
                self.handle_keydown(event.key)

    def rand_food(self):
        while True:
            x = random.randrange(CELL_GRID[0])
            y = random.randrange(CELL_GRID[1])
            self.food = Vector(x, y)
            if not self.snake == self.food:
                break

    def get_distance(self) -> int:
        return abs(self.snake[0].x - self.food.x) + abs(self.snake[0].y - self.food.y)


    def step(self, action : Action) -> tuple[tuple, float, bool]:
        old_distance = self.get_distance()
        self.handle_events()
        self.handle_action(action)
        new_head = self.snake[0] + self.direction 
        self.snake.appendleft(new_head)
        done = False
        reward = -0.05  # Reduced penalty for each step
        new_distance = self.get_distance()

        if new_distance < old_distance:
            reward += 0.5  # Better reward for moving closer
        elif new_distance > old_distance:
            reward -= 0.25  # Penalty for moving away
            
        if len(self.snake) > 1:
            for snakepart in list(self.snake)[1:]:
                if self.snake[0] == snakepart:
                    reward = -10
                    print("Debil sie zjadł")
                    done = True
                    return (tuple(self.get_state()), reward, done)
                    
            
        if self.snake[0] == self.food:
            self.rand_food()
            self.points += 1
            reward = 10  # Large reward for eating food
        else:
            self.snake.pop()
        
            
        if (
            self.snake[0].x > CELL_GRID[0]
            or self.snake[0].x < 0
            or self.snake[0].y > CELL_GRID[1]
            or self.snake[0].y < 0
        ):
            reward = -10
            done = True
        self.draw()
        self.clock.tick(FPS)
        return (tuple(self.get_state()), reward, done)

    def step_with_keyboard(self) -> tuple[tuple, float, bool]:
        action = Action.STRAIGHT 
        
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.running = False
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_q:
                    self.running = False
                elif event.key in (pygame.K_LEFT, pygame.K_a):
                    action = Action.LEFT
                elif event.key in (pygame.K_RIGHT, pygame.K_d):
                    action = Action.RIGHT
                elif event.key in (pygame.K_UP, pygame.K_DOWN, pygame.K_w, pygame.K_s):
                    action = Action.STRAIGHT
        
        return self.step(action)

    def get_state(self) -> tuple:
        food_front = False
        food_left = False
        food_right = False

        if self.direction == Vector(1, 0):  # Moving RIGHT
            food_front = self.snake[0].x < self.food.x
            food_left = self.snake[0].y > self.food.y
            food_right = self.snake[0].y < self.food.y
        elif self.direction == Vector(-1, 0):  # Moving LEFT
            food_front = self.snake[0].x > self.food.x
            food_left = self.snake[0].y < self.food.y
            food_right = self.snake[0].y > self.food.y
        elif self.direction == Vector(0, 1):  # Moving DOWN
            food_front = self.snake[0].y < self.food.y
            food_left = self.snake[0].x < self.food.x
            food_right = self.snake[0].x > self.food.x
        elif self.direction == Vector(0, -1):  # Moving UP
            food_front = self.snake[0].y > self.food.y
            food_left = self.snake[0].x > self.food.x
            food_right = self.snake[0].x < self.food.x

        # Add distance information for better learning
        distance = self.get_distance()
        distance_level = min(distance // 5, 3)  # 0-3 levels
        
        return (food_front, food_left, food_right, distance_level)

    def reset(self):
        self.rand_food()
        self.snake = deque()
        self.snake.append(Vector(CELL_GRID[0] // 2, CELL_GRID[1] // 2))
        self.points = 0

    def draw(self):
        self.screen.fill(EMPTY)
        # Draw snake
        for cell in self.snake:
            snake_rect = pygame.Rect(
            cell.x * CELL_SIZE[0],
            cell.y * CELL_SIZE[1],
            CELL_SIZE[0],
            CELL_SIZE[1],
            )
            pygame.draw.rect(self.screen, SNAKE, snake_rect)

            
        # Draw food
        food_rect = pygame.Rect(
            self.food.x * CELL_SIZE[0],
            self.food.y * CELL_SIZE[1],
            CELL_SIZE[0],
            CELL_SIZE[1],
        )
        pygame.draw.rect(self.screen, FOOD, food_rect)

        pygame.display.flip()


# 4. Tworzenia agenta 


In [4]:
def randomAction() -> tuple[int, int, int]:
    index = random.randint(0, 2)
    return (1 if index == 0 else 0, 1 if index == 1 else 0, 1 if index == 2 else 0)


n_actions = 3
class AgentQ:
    def __init__(self, game: Game) -> None:
        self.Q_Table = defaultdict(lambda: np.zeros(n_actions))
        self.alpha = 0.1  # Lower learning rate for stability
        self.gamma = 0.95  # Higher discount factor
        self.epsilon :float = 1 
        self.game = game

    def get_action(self, state):
        if random.random() < self.epsilon:
            return random.randint(0, n_actions - 1)
        else:
            return argmax(self.Q_Table[state])

    def make_step(self):
        old_state = tuple(self.game.get_state())
        action = self.get_action(old_state)
        new_state, reward, done = self.game.step(tuple_to_action(action))
        current_q = self.Q_Table[old_state][action]
        max_future_q = 0 if done else np.max(self.Q_Table[new_state])
        self.Q_Table[old_state][action] = self.calculate_q(current_q, max_future_q, reward)
        return new_state, reward, done

    def calculate_q(self, current_q, max_future_q, reward):
        return current_q + self.alpha * (reward + self.gamma * max_future_q - current_q)
    
    

# 5. Pętla szkoląca

In [5]:
def train_agent(agent : AgentQ, num_episodes=10000, epsilon_decay=0.9995, min_epsilon=0.01):
    episode_rewards = []
    episode_points = []
    avg_rewards = []
    avg_points = []
    for episode in range(num_episodes):
        agent.game.reset()
        done = False
        total_reward = 0
        steps = 0
        if not agent.game.running:
            break
        while not done and steps < 500:  # Limit steps to prevent infinite episodes
            _, reward, done = agent.make_step()
            total_reward += reward
            steps += 1
        episode_rewards.append(total_reward)
        episode_points.append(agent.game.points)
        agent.epsilon = max(min_epsilon, agent.epsilon * epsilon_decay)
        if episode % 50 == 0:
            avg_reward = statistics.mean(episode_rewards)
            avg_point = statistics.mean(episode_points)
            avg_rewards.append(avg_reward)
            avg_points.append(avg_point)
            episode_rewards = []
            episode_points = []
            live_plot(avg_rewards, avg_points)
            print(f"Episode {episode}/{num_episodes}, Epsilon: {agent.epsilon:.4f}, Avg Reward: {np.mean(episode_rewards[-100:]):.3f}")
        if episode % 100 == 0:
            save_agent(agent, f"q_table_{episode}.pkl")
    plt.ioff() 
    plt.show()
try:
    game = Game()
    # agent = AgentQ(game)
    
    while True:
        game.step_with_keyboard()

    # train_agent(agent)
finally:
    pygame.quit()


Debil sie zjadł
Debil sie zjadł


KeyboardInterrupt: 