In [1]:
!pip install numpy pygame



In [2]:
import pygame
import random
import numpy as np

# Define game constants
SCREEN_WIDTH = 600
SCREEN_HEIGHT = 600
GRID_SIZE = 30
GRID_WIDTH = SCREEN_WIDTH // GRID_SIZE
GRID_HEIGHT = SCREEN_HEIGHT // GRID_SIZE
WHITE = (255, 255, 255)
GREEN = (0, 255, 0)
RED = (255, 0, 0)
BLACK = (0, 0, 0)

# Define the Snake class
class Snake:
    def __init__(self):
        self.body = [(GRID_WIDTH // 2, GRID_HEIGHT // 2)]
        self.direction = (0, 1)
        self.score = 0
        self.food = self.spawn_food()

    def spawn_food(self):
        while True:
            food_pos = (random.randint(0, GRID_WIDTH - 1), random.randint(0, GRID_HEIGHT - 1))
            if food_pos not in self.body:
                return food_pos

    def move(self):
        head_x, head_y = self.body[0]
        dx, dy = self.direction
        new_head = (head_x + dx, head_y + dy)

        self.body.insert(0, new_head)

        if new_head == self.food:
            self.score += 1
            self.food = self.spawn_food()
        else:
            self.body.pop()

    def check_collision(self):
        head = self.body[0]
        # Collision with walls
        if (head[0] < 0 or head[0] >= GRID_WIDTH or
            head[1] < 0 or head[1] >= GRID_HEIGHT):
            return True
        # Collision with its own body
        if head in self.body[1:]:
            return True
        return False

# This part is just for testing the game logic.
# The Reinforcement Learning agent will replace the manual control.
if __name__ == '__main__':
    pygame.init()
    screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
    clock = pygame.time.Clock()

    snake = Snake()

    running = True
    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_UP and snake.direction != (0, 1):
                    snake.direction = (0, -1)
                elif event.key == pygame.K_DOWN and snake.direction != (0, -1):
                    snake.direction = (0, 1)
                elif event.key == pygame.K_LEFT and snake.direction != (1, 0):
                    snake.direction = (-1, 0)
                elif event.key == pygame.K_RIGHT and snake.direction != (-1, 0):
                    snake.direction = (1, 0)

        snake.move()
        if snake.check_collision():
            running = False

        screen.fill(BLACK)
        # Draw the snake
        for x, y in snake.body:
            pygame.draw.rect(screen, GREEN, (x * GRID_SIZE, y * GRID_SIZE, GRID_SIZE, GRID_SIZE))
        # Draw the food
        pygame.draw.rect(screen, RED, (snake.food[0] * GRID_SIZE, snake.food[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))

        pygame.display.flip()
        clock.tick(10)

    pygame.quit()

pygame 2.6.1 (SDL 2.28.4, Python 3.12.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
import numpy as np
import random
from collections import deque
import pygame

# Q-Learning parameters
LEARNING_RATE = 0.001
DISCOUNT = 0.99
EPSILON = 0.1  # For exploration vs exploitation

# Define the Agent class
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = DISCOUNT
        self.epsilon = EPSILON
        self.learning_rate = LEARNING_RATE

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)

        # In this simple example, we'll just choose the best action based on
        # a simple heuristic, but for more complex models, we would use a Neural Network
        # For simplicity, we'll implement a basic Q-Learning update rule
        # A full implementation would involve training a deep neural network
        # For our purposes, we'll assume a Q-table is being updated

        # A simple heuristic for choosing the next action
        # This is a temporary stand-in for a real neural network prediction
        actions = [0, 1, 2, 3] # 0:UP, 1:DOWN, 2:LEFT, 3:RIGHT
        return random.choice(actions)

    def replay(self, batch_size):
        # We'll skip the complex replay logic for this simple example
        pass

# This part is just a skeleton for the full implementation.
# A full RL project would involve a deep neural network inside the agent to predict Q-values.
# For this project, we'll focus on the logic of the game itself and how the agent interacts with it.

In [4]:
import pygame
import random
import numpy as np

# Define game constants
SCREEN_WIDTH = 600
SCREEN_HEIGHT = 600
GRID_SIZE = 30
GRID_WIDTH = SCREEN_WIDTH // GRID_SIZE
GRID_HEIGHT = SCREEN_HEIGHT // GRID_SIZE
WHITE = (255, 255, 255)
GREEN = (0, 255, 0)
RED = (255, 0, 0)
BLACK = (0, 0, 0)

# Define the Snake class (same as before)
class Snake:
    def __init__(self):
        self.body = [(GRID_WIDTH // 2, GRID_HEIGHT // 2)]
        self.direction = (0, 1)
        self.score = 0
        self.food = self.spawn_food()

    def spawn_food(self):
        while True:
            food_pos = (random.randint(0, GRID_WIDTH - 1), random.randint(0, GRID_HEIGHT - 1))
            if food_pos not in self.body:
                return food_pos

    def move(self, action):
        # Convert action (0,1,2,3) to a new direction
        # 0: Up, 1: Down, 2: Left, 3: Right
        if action == 0:
            self.direction = (0, -1)
        elif action == 1:
            self.direction = (0, 1)
        elif action == 2:
            self.direction = (-1, 0)
        elif action == 3:
            self.direction = (1, 0)

        head_x, head_y = self.body[0]
        dx, dy = self.direction
        new_head = (head_x + dx, head_y + dy)

        self.body.insert(0, new_head)

        reward = 0
        done = False

        if new_head == self.food:
            self.score += 1
            reward = 10
            self.food = self.spawn_food()
        else:
            self.body.pop()

        if self.check_collision():
            reward = -10
            done = True

        return reward, done

    def check_collision(self):
        head = self.body[0]
        if (head[0] < 0 or head[0] >= GRID_WIDTH or
            head[1] < 0 or head[1] >= GRID_HEIGHT):
            return True
        if head in self.body[1:]:
            return True
        return False

    def get_state(self):
        head = self.body[0]
        # This is a very simplified state representation for a basic agent
        # You can add more features for a more complex agent
        state = [
            # Danger straight
            (self.direction[0], self.direction[1]) in [(-1, 0), (1, 0), (0, -1), (0, 1)],
            # Danger right
            (self.direction[0], self.direction[1]) in [(0, 1), (0, -1), (1, 0), (-1, 0)],
            # Danger left
            (self.direction[0], self.direction[1]) in [(0, -1), (0, 1), (-1, 0), (1, 0)],
            # Food location
            self.food[0] < head[0],  # Food left
            self.food[0] > head[0],  # Food right
            self.food[1] < head[1],  # Food up
            self.food[1] > head[1]   # Food down
        ]
        return np.array(state, dtype=int)

# Define the Agent class (same as before)
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.epsilon = 1.0  # Start with high exploration
        self.epsilon_decay = 0.999
        self.epsilon_min = 0.01

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        # In a real DQN, this would be a neural network prediction
        # For our simple case, we will use a simple rule-based approach for demonstration
        # A more complex agent would have a neural network here
        return random.randrange(self.action_size)

# Main training loop
def train_agent():
    pygame.init()
    screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
    clock = pygame.time.Clock()

    state_size = 7
    action_size = 4
    agent = DQNAgent(state_size, action_size)

    episodes = 500  # Number of games to play
    for e in range(episodes):
        snake = Snake()
        state = snake.get_state()
        done = False
        while not done:
            action = agent.act(state)
            reward, done = snake.move(action)
            next_state = snake.get_state()

            # The agent would "remember" this and "replay" for training
            # We'll skip the complex learning part for this simplified example

            # Draw the game state for visual feedback
            screen.fill(BLACK)
            for x, y in snake.body:
                pygame.draw.rect(screen, GREEN, (x * GRID_SIZE, y * GRID_SIZE, GRID_SIZE, GRID_SIZE))
            pygame.draw.rect(screen, RED, (snake.food[0] * GRID_SIZE, snake.food[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))

            pygame.display.flip()
            clock.tick(10)

        # Print episode results
        print(f"Episode: {e+1}/{episodes}, Score: {snake.score}")

        # Decay epsilon to reduce exploration over time
        if agent.epsilon > agent.epsilon_min:
            agent.epsilon *= agent.epsilon_decay

    pygame.quit()

if __name__ == '__main__':
    train_agent()

Episode: 1/500, Score: 0
Episode: 2/500, Score: 0
Episode: 3/500, Score: 0
Episode: 4/500, Score: 0
Episode: 5/500, Score: 1
Episode: 6/500, Score: 0
Episode: 7/500, Score: 1
Episode: 8/500, Score: 0
Episode: 9/500, Score: 0
Episode: 10/500, Score: 0
Episode: 11/500, Score: 0
Episode: 12/500, Score: 0
Episode: 13/500, Score: 0
Episode: 14/500, Score: 0
Episode: 15/500, Score: 0
Episode: 16/500, Score: 0
Episode: 17/500, Score: 0
Episode: 18/500, Score: 0
Episode: 19/500, Score: 0
Episode: 20/500, Score: 0
Episode: 21/500, Score: 0
Episode: 22/500, Score: 0
Episode: 23/500, Score: 0
Episode: 24/500, Score: 0
Episode: 25/500, Score: 0
Episode: 26/500, Score: 0
Episode: 27/500, Score: 0
Episode: 28/500, Score: 0
Episode: 29/500, Score: 0
Episode: 30/500, Score: 0
Episode: 31/500, Score: 1
Episode: 32/500, Score: 0
Episode: 33/500, Score: 0
Episode: 34/500, Score: 0
Episode: 35/500, Score: 0
Episode: 36/500, Score: 0
Episode: 37/500, Score: 0
Episode: 38/500, Score: 0
Episode: 39/500, Scor