In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

  and should_run_async(code)


# Developnig Snake Game

In [3]:
import pygame
import random
from enum import Enum
from collections import namedtuple

pygame.init()

class Direction(Enum):
    RIGHT = 1
    LEFT = 2
    UP = 3
    DOWN = 4
    
Point = namedtuple('Point', 'x, y')

# rgb colors
WHITE = (255, 255, 255)
RED = (200,0,0)
BLUE1 = (0, 0, 255)
BLUE2 = (0, 100, 255)
BLACK = (0,0,0)

BLOCK_SIZE = 20
SPEED = 20

class SnakeGame:
    
    def __init__(self, w=640, h=480):
        self.w = w
        self.h = h
        # init display
        self.display = pygame.display.set_mode((self.w, self.h))
        pygame.display.set_caption('Snake')
        self.clock = pygame.time.Clock()
        
        # init game state
        self.direction = Direction.RIGHT
        
        self.head = Point(self.w/2, self.h/2)
        self.snake = [self.head, 
                      Point(self.head.x-BLOCK_SIZE, self.head.y),
                      Point(self.head.x-(2*BLOCK_SIZE), self.head.y)]
        
        self.score = 0
        self.food = None
        self._place_food()
        
    def _place_food(self):
        x = random.randint(0, (self.w-BLOCK_SIZE )//BLOCK_SIZE )*BLOCK_SIZE 
        y = random.randint(0, (self.h-BLOCK_SIZE )//BLOCK_SIZE )*BLOCK_SIZE
        self.food = Point(x, y)
        if self.food in self.snake:
            self._place_food()
        
    def play_step(self):
        # 1. collect user input
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_LEFT:
                    self.direction = Direction.LEFT
                elif event.key == pygame.K_RIGHT:
                    self.direction = Direction.RIGHT
                elif event.key == pygame.K_UP:
                    self.direction = Direction.UP
                elif event.key == pygame.K_DOWN:
                    self.direction = Direction.DOWN
        
        # 2. move
        self._move(self.direction) # update the head
        self.snake.insert(0, self.head)
        
        # 3. check if game over
        game_over = False
        if self._is_collision():
            game_over = True
            return game_over, self.score
            
        # 4. place new food or just move
        if self.head == self.food:
            self.score += 1
            self._place_food()
        else:
            self.snake.pop()
        
        # 5. update ui and clock
        self._update_ui()
        self.clock.tick(SPEED)
        # 6. return game over and score
        return game_over, self.score
    
    def _is_collision(self):
        # hits boundary
        if self.head.x > self.w - BLOCK_SIZE or self.head.x < 0 or self.head.y > self.h - BLOCK_SIZE or self.head.y < 0:
            return True
        # hits itself
        if self.head in self.snake[1:]:
            return True
        
        return False
        
    def _update_ui(self):
        self.display.fill(BLACK)
        
        for pt in self.snake:
            pygame.draw.rect(self.display, BLUE1, pygame.Rect(pt.x, pt.y, BLOCK_SIZE, BLOCK_SIZE))
            pygame.draw.rect(self.display, BLUE2, pygame.Rect(pt.x+4, pt.y+4, 12, 12))
            
        pygame.draw.rect(self.display, RED, pygame.Rect(self.food.x, self.food.y, BLOCK_SIZE, BLOCK_SIZE))
        

        pygame.display.flip()
        
    def _move(self, direction):
        x = self.head.x
        y = self.head.y
        if direction == Direction.RIGHT:
            x += BLOCK_SIZE
        elif direction == Direction.LEFT:
            x -= BLOCK_SIZE
        elif direction == Direction.DOWN:
            y += BLOCK_SIZE
        elif direction == Direction.UP:
            y -= BLOCK_SIZE
            
        self.head = Point(x, y)
            

if __name__ == '__main__':
    game = SnakeGame()
    
    # game loop
    while True:
        game_over, score = game.play_step()
        
        if game_over == True:
            break
        
    print('Final Score', score)
        
        
    pygame.quit()

  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(

Final Score 0


# Setting Snake Game as Environment 

In [4]:
import gymnasium as gym
import numpy as np
from gymnasium import spaces

class SnakeGameEnv(gym.Env):
    """Wrapper around PyGame Snake game to make it compatible with gymnasium"""
    def __init__(self, render_mode=None):
        super().__init__()
        self.game = SnakeGame()
        self.action_space = spaces.Discrete(4)  # UP, RIGHT, DOWN, LEFT
        
        # Observation space will be a binary grid representing:
        # Channel 1: Snake body
        # Channel 2: Snake head
        # Channel 3: Food
        grid_size = (self.game.h // BLOCK_SIZE, self.game.w // BLOCK_SIZE)
        self.observation_space = spaces.Box(
            low=0, high=1,
            shape=(grid_size[0], grid_size[1], 3),
            dtype=np.float32
        )
        self.render_mode = render_mode

    def reset(self, seed=None):
        super().reset(seed=seed)
        # Reset the game
        self.game = SnakeGame()
        return self._get_obs(), {}

    def _get_obs(self):
        grid_size = (self.game.h // BLOCK_SIZE, self.game.w // BLOCK_SIZE)
        obs = np.zeros((grid_size[0], grid_size[1], 3), dtype=np.float32)

        # Mark snake body
        for point in self.game.snake:
            x, y = int(point.x // BLOCK_SIZE), int(point.y // BLOCK_SIZE)
            obs[y, x, 0] = 1  # Snake body in first channel

        # Mark snake head
        head_x, head_y = int(self.game.head.x // BLOCK_SIZE), int(self.game.head.y // BLOCK_SIZE)
        obs[head_y, head_x, 1] = 1  # Snake head in second channel

        # Mark food
        food_x, food_y = int(self.game.food.x // BLOCK_SIZE), int(self.game.food.y // BLOCK_SIZE)
        obs[food_y, food_x, 2] = 1  # Food in third channel

        return obs

    def step(self, action):
        # Convert action (0,1,2,3) to Direction enum
        direction_map = {
            0: Direction.UP,
            1: Direction.RIGHT,
            2: Direction.DOWN,
            3: Direction.LEFT
        }
        self.game.direction = direction_map[action]
        
        # Take step in game
        game_over, score = self.game.play_step()
        
        # Calculate reward
        if game_over:
            reward = -10
        elif score > self.game.score - 1:  # If score increased (food eaten)
            reward = 10
        else:
            reward = -0.1  # Small negative reward for each step
            
            # Add reward for getting closer to food
            head = self.game.head
            food = self.game.food
            distance_to_food = abs(head.x - food.x) + abs(head.y - food.y)
            reward += 1 / (distance_to_food + 1)  # Reward inversely proportional to distance
        
        return self._get_obs(), reward, game_over, False, {"score": score}

    def render(self):
        # PyGame already handles rendering
        pass

    def close(self):
        self.game.display.quit() 

  and should_run_async(code)


In [2]:
import gymnasium as gym
import numpy as np
from gymnasium import spaces
import random
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
import os
import torch 

class SnakeEnv(gym.Env):
    """Custom Snake Environment that follows gym interface"""
    def __init__(self, grid_size=10):
        super(SnakeEnv, self).__init__()
        self.grid_size = grid_size
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(low=0, high=1,
                                         shape=(self.grid_size, self.grid_size, 3),
                                         dtype=np.float32)
        self.reset()
    
    def reset(self, seed=None):
        super().reset(seed=seed)
        self.snake_pos = [(self.grid_size//2, self.grid_size//2)]
        self.snake_direction = 1  
        self.food_pos = self._place_food()
        self.steps = 0
        self.max_steps = 100
        return self._get_obs(), {}
    
    def _place_food(self):
        while True:
            food = (random.randint(0, self.grid_size-1), 
                   random.randint(0, self.grid_size-1))
            if food not in self.snake_pos:
                return food
    
    def _get_obs(self):
        obs = np.zeros((self.grid_size, self.grid_size, 3), dtype=np.float32)        
        for x, y in self.snake_pos:
            obs[x, y, 0] = 1
        head_x, head_y = self.snake_pos[0]
        obs[head_x, head_y, 1] = 1
        obs[self.food_pos[0], self.food_pos[1], 2] = 1
        return obs
    
    def step(self, action):
        self.steps += 1
        head_x, head_y = self.snake_pos[0]
        if action == 0:  # up
            new_head = (head_x - 1, head_y)
        elif action == 1:  # right
            new_head = (head_x, head_y + 1)
        elif action == 2:  # down
            new_head = (head_x + 1, head_y)
        else:  # left
            new_head = (head_x, head_y - 1)
        
        done = False
        reward = -0.1  
        if (new_head[0] < 0 or new_head[0] >= self.grid_size or
            new_head[1] < 0 or new_head[1] >= self.grid_size or
            new_head in self.snake_pos):
            done = True
            reward = -10  # Larger negative reward for collisions
        else:
            # Move snake
            self.snake_pos.insert(0, new_head)
            
            # Check if food is eaten
            if new_head == self.food_pos:
                reward = 10  # Larger reward for eating food
                self.food_pos = self._place_food()
            else:
                self.snake_pos.pop()
                reward += 0.1  # Small positive reward for surviving each step
        
        if self.steps >= self.max_steps:
            done = True
        
        return self._get_obs(), reward, done, False, {}

# Training function
def train_and_evaluate(model_class, env, total_timesteps, model_name):
    model = model_class("MlpPolicy", env, verbose=1)
    model.learn(total_timesteps=total_timesteps)
    
    # Save the model
    model.save(model_name)
    
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
    return model, mean_reward, std_reward

def main():
    # Create and wrap the environment
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    env = SnakeGameEnv()
    env = Monitor(env)
    
    print("Training DQN...")
    dqn_model, dqn_mean_reward, dqn_std_reward = train_and_evaluate(
        DQN, env, total_timesteps=2000, model_name="dqn_snake"
    )
    
    print("Training PPO...")
    ppo_model, ppo_mean_reward, ppo_std_reward = train_and_evaluate(
        PPO, env, total_timesteps=2000, model_name="ppo_snake"
    )
    
    print("\nResults:")
    print(f"DQN - Mean reward: {dqn_mean_reward:.2f} +/- {dqn_std_reward:.2f}")
    print(f"PPO - Mean reward: {ppo_mean_reward:.2f} +/- {ppo_std_reward:.2f}")

if __name__ == "__main__":
    main()

  and should_run_async(code)


NameError: name 'SnakeGameEnv' is not defined

In [1]:
from stable_baselines3 import DQN, PPO
import time

def play_model(model_path, model_type=DQN):
    # Load the trained model
    model = model_type.load(model_path)
    
    # Create environment
    env = SnakeGameEnv()
    obs, _ = env.reset()
    
    done = False
    total_reward = 0
    
    while not done:
        # Get model's action
        action, _ = model.predict(obs, deterministic=True)
        
        # Take step in environment
        obs, reward, done, _, info = env.step(action)
        total_reward += reward
        
        # Add small delay to make visualization easier to follow
        time.sleep(0.1)
    
    print(f"Game Over! Final Score: {info['score']}")
    return total_reward

if __name__ == "__main__":
    # Play with DQN model
    print("\nPlaying DQN model...")
    play_model("dqn_snake", DQN)
    
    # Play with PPO model
    print("\nPlaying PPO model...")
    play_model("ppo_snake", PPO) 

  from jax import xla_computation as _xla_computation



Playing DQN model...


FileNotFoundError: [Errno 2] No such file or directory: 'dqn_snake.zip'