In [1]:
!pip install numpy gym tensorflow



In [2]:
import gym
from gym import spaces
import numpy as np

# This class defines our custom environment
class GridWorldEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, size=10):
        super(GridWorldEnv, self).__init__()
        self.size = size
        self.action_space = spaces.Discrete(4) # 0:Up, 1:Down, 2:Left, 3:Right
        self.observation_space = spaces.Box(low=0, high=size-1, shape=(2,), dtype=np.int32)
        self.agent_pos = None
        self.target_pos = None
        self.reset()

    def reset(self):
        # Reset the environment to a new random state
        self.agent_pos = np.random.randint(0, self.size, size=2)
        self.target_pos = np.random.randint(0, self.size, size=2)
        # Make sure target and agent are not in the same spot at the start
        while np.array_equal(self.agent_pos, self.target_pos):
            self.target_pos = np.random.randint(0, self.size, size=2)
        return self._get_obs()

    def _get_obs(self):
        # Return the observation (the state of the environment)
        return self.agent_pos

    def step(self, action):
        # Take a step based on the agent's action

        # Calculate the new position
        if action == 0: # Up
            self.agent_pos[0] -= 1
        elif action == 1: # Down
            self.agent_pos[0] += 1
        elif action == 2: # Left
            self.agent_pos[1] -= 1
        elif action == 3: # Right
            self.agent_pos[1] += 1

        # Clip position to stay within the grid
        self.agent_pos = np.clip(self.agent_pos, 0, self.size - 1)

        # Calculate reward
        reward = 0
        done = False

        if np.array_equal(self.agent_pos, self.target_pos):
            reward = 10
            done = True
        else:
            # Penalize for each step taken to encourage efficiency
            reward = -0.1

        obs = self._get_obs()
        info = {} # Additional information, we can leave this empty for now

        return obs, reward, done, info

    def render(self, mode='human'):
        # This is for visualization, we'll implement a simple text-based rendering
        grid = np.full((self.size, self.size), ' . ')
        grid[self.target_pos[0], self.target_pos[1]] = ' T '
        grid[self.agent_pos[0], self.agent_pos[1]] = ' A '
        print('\n' + np.array2string(grid))

# This part is just to test our environment
if __name__ == '__main__':
    env = GridWorldEnv(size=5)
    obs = env.reset()
    env.render()
    done = False

    print("Testing the environment with random actions...")

    while not done:
        action = env.action_space.sample() # Take a random action
        obs, reward, done, info = env.step(action)
        env.render()
        print(f"Action: {action}, Reward: {reward}, Done: {done}")

    print("Test finished.")


[[' . ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']
 [' A ' ' . ' ' T ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']]
Testing the environment with random actions...

[[' . ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']
 [' A ' ' . ' ' T ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']]
Action: 2, Reward: -0.1, Done: False

[[' . ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']
 [' A ' ' . ' ' T ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']]
Action: 2, Reward: -0.1, Done: False

[[' . ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']
 [' A ' ' . ' ' T ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']]
Action: 2, Reward: -0.1, Done: False

[[' . ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']
 [' A ' ' . ' ' . ' ' . ' ' . ']
 [' . ' ' . ' ' T ' ' . ' ' . ']
 [' . ' ' . ' ' . ' ' . ' ' . ']]
Action:

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  return datetime.utcnow().replace(tzinfo=utc)


In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from collections import deque
import numpy as np
import random
import time

# Define the DQN Agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # Discount rate
        self.epsilon = 1.0   # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Network for the agent's brain
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])  # Returns action with highest Q-value

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0]))

            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

  return datetime.utcnow().replace(tzinfo=utc)


In [4]:
import pygame
import random
import numpy as np

# Define game constants
SCREEN_WIDTH = 600
SCREEN_HEIGHT = 600
GRID_SIZE = 30
GRID_WIDTH = SCREEN_WIDTH // GRID_SIZE
GRID_HEIGHT = SCREEN_HEIGHT // GRID_SIZE
WHITE = (255, 255, 255)
GREEN = (0, 255, 0)
RED = (255, 0, 0)
BLACK = (0, 0, 0)

# Define the Snake class (same as before)
class Snake:
    def __init__(self):
        self.body = [(GRID_WIDTH // 2, GRID_HEIGHT // 2)]
        self.direction = (0, 1)
        self.score = 0
        self.food = self.spawn_food()

    def spawn_food(self):
        while True:
            food_pos = (random.randint(0, GRID_WIDTH - 1), random.randint(0, GRID_HEIGHT - 1))
            if food_pos not in self.body:
                return food_pos

    def move(self, action):
        # Convert action (0,1,2,3) to a new direction
        # 0: Up, 1: Down, 2: Left, 3: Right
        if action == 0:
            self.direction = (0, -1)
        elif action == 1:
            self.direction = (0, 1)
        elif action == 2:
            self.direction = (-1, 0)
        elif action == 3:
            self.direction = (1, 0)

        head_x, head_y = self.body[0]
        dx, dy = self.direction
        new_head = (head_x + dx, head_y + dy)

        self.body.insert(0, new_head)

        reward = 0
        done = False

        if new_head == self.food:
            self.score += 1
            reward = 10
            self.food = self.spawn_food()
        else:
            self.body.pop()

        if self.check_collision():
            reward = -10
            done = True

        return reward, done

    def check_collision(self):
        head = self.body[0]
        if (head[0] < 0 or head[0] >= GRID_WIDTH or
            head[1] < 0 or head[1] >= GRID_HEIGHT):
            return True
        if head in self.body[1:]:
            return True
        return False

    def get_state(self):
        head = self.body[0]
        # This is a very simplified state representation for a basic agent
        # You can add more features for a more complex agent
        state = [
            # Danger straight
            (self.direction[0], self.direction[1]) in [(-1, 0), (1, 0), (0, -1), (0, 1)],
            # Danger right
            (self.direction[0], self.direction[1]) in [(0, 1), (0, -1), (1, 0), (-1, 0)],
            # Danger left
            (self.direction[0], self.direction[1]) in [(0, -1), (0, 1), (-1, 0), (1, 0)],
            # Food location
            self.food[0] < head[0],  # Food left
            self.food[0] > head[0],  # Food right
            self.food[1] < head[1],  # Food up
            self.food[1] > head[1]   # Food down
        ]
        return np.array(state, dtype=int)

# Define the Agent class (same as before)
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.epsilon = 1.0  # Start with high exploration
        self.epsilon_decay = 0.999
        self.epsilon_min = 0.01

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        # In a real DQN, this would be a neural network prediction
        # For our simple case, we will use a simple rule-based approach for demonstration
        # A more complex agent would have a neural network here
        return random.randrange(self.action_size)

# Main training loop
def train_agent():
    pygame.init()
    screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
    clock = pygame.time.Clock()

    state_size = 7
    action_size = 4
    agent = DQNAgent(state_size, action_size)

    episodes = 500  # Number of games to play
    for e in range(episodes):
        snake = Snake()
        state = snake.get_state()
        done = False
        while not done:
            action = agent.act(state)
            reward, done = snake.move(action)
            next_state = snake.get_state()

            # The agent would "remember" this and "replay" for training
            # We'll skip the complex learning part for this simplified example

            # Draw the game state for visual feedback
            screen.fill(BLACK)
            for x, y in snake.body:
                pygame.draw.rect(screen, GREEN, (x * GRID_SIZE, y * GRID_SIZE, GRID_SIZE, GRID_SIZE))
            pygame.draw.rect(screen, RED, (snake.food[0] * GRID_SIZE, snake.food[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))

            pygame.display.flip()
            clock.tick(10)

        # Print episode results
        print(f"Episode: {e+1}/{episodes}, Score: {snake.score}")

        # Decay epsilon to reduce exploration over time
        if agent.epsilon > agent.epsilon_min:
            agent.epsilon *= agent.epsilon_decay

    pygame.quit()

if __name__ == '__main__':
    train_agent()

  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
  return datetime.utcnow().replace(tzinfo=utc)


Episode: 1/500, Score: 0
Episode: 2/500, Score: 0
Episode: 3/500, Score: 1
Episode: 4/500, Score: 0
Episode: 5/500, Score: 0
Episode: 6/500, Score: 0
Episode: 7/500, Score: 0
Episode: 8/500, Score: 0
Episode: 9/500, Score: 0
Episode: 10/500, Score: 1
Episode: 11/500, Score: 0
Episode: 12/500, Score: 0
Episode: 13/500, Score: 0
Episode: 14/500, Score: 0
Episode: 15/500, Score: 0
Episode: 16/500, Score: 0
Episode: 17/500, Score: 1
Episode: 18/500, Score: 0
Episode: 19/500, Score: 0
Episode: 20/500, Score: 0
Episode: 21/500, Score: 0
Episode: 22/500, Score: 0
Episode: 23/500, Score: 0
Episode: 24/500, Score: 2
Episode: 25/500, Score: 0
Episode: 26/500, Score: 0
Episode: 27/500, Score: 0
Episode: 28/500, Score: 1
Episode: 29/500, Score: 0
Episode: 30/500, Score: 1
Episode: 31/500, Score: 1
Episode: 32/500, Score: 0
Episode: 33/500, Score: 1
Episode: 34/500, Score: 0
Episode: 35/500, Score: 0
Episode: 36/500, Score: 0
Episode: 37/500, Score: 1
Episode: 38/500, Score: 0
Episode: 39/500, Scor