In [7]:
## Delt board og spil op i forskellige classes så man kan restarte uden at reloade board

import random
import pygame
import numpy as np
import cv2

class SnakeBoard:
    def __init__(self, width=15, height=15):
        self.width = width
        self.height = height
        self.SCALE = 10
        # pygame
        pygame.init()
        self.screen = pygame.display.set_mode((width * self.SCALE, height * self.SCALE))

    def quit(self):
        pygame.quit()   
        print("Quitting game...")


class SnakeGame:
    def __init__(self, snake_board, width=15, height=15):
        self.snake_board = snake_board
        self.screen = snake_board.screen
        self.width = snake_board.width
        self.height = snake_board.height
        startpos = (width // 2, height // 2)
        self.SCALE = snake_board.SCALE
        self.apple = (random.randint(0, height - 1), random.randint(0, width - 1))

        self.tail = [startpos]
        self.directions = {"up": (-1, 0), "down": (1, 0), "right": (0, 1), "left": (0, -1)}
        self.currentdir = None

        # pygame
        self.clock = pygame.time.Clock()
        self.running = True
        self.tick_counter = 0


    def capture_screen(self):
        # Get the dimensions of the game area
        game_rect = pygame.Rect(0, 0, self.width * self.SCALE, self.height * self.SCALE)

        # Capture the screen data
        screen_data = pygame.surfarray.array3d(self.screen.subsurface(game_rect))

        # Convert the screen data to a Pygame surface
        screen_surface = pygame.surfarray.make_surface(screen_data)

        # Resize the captured screen to 15x15 pixels
        resized_screen_surface = pygame.transform.scale(screen_surface, (15, 15))

        # Convert the resized surface to a NumPy array
        resized_screen_data = pygame.surfarray.array3d(resized_screen_surface)

        # Convert RGB to grayscale
        grayscale_data = cv2.cvtColor(resized_screen_data, cv2.COLOR_RGB2GRAY)

        # Flatten the array
        flattened_screen_data = grayscale_data.flatten()
        reshaped_data = np.reshape(grayscale_data, (15, 15))
        # Save the array to a text file
        np.savetxt('screen_data.txt', reshaped_data, fmt='%.2f', delimiter=',')

        # Return the processed screen data
        #return flattened_screen_data

    def ask_to_continue(self):
        font = pygame.font.Font(None, 20)
        text = font.render("Do you want to continue? (Y/N)", True, (255, 255, 255))
        self.screen.blit(text, (50, 50))
        pygame.display.flip()

        while True:
            for event in pygame.event.get():
                if event.type == pygame.KEYDOWN:
                    if event.key == pygame.K_y:
                        return True
                    elif event.key == pygame.K_n:
                        return False
    
    def end(self):
        print(f"You died!\nScore: {len(self.tail)}")
        self.running = False

    def run(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.running = False

        keys = pygame.key.get_pressed()

        self.screen.fill((255, 0, 0))

        if keys[pygame.K_p]:
            self.end()
        
        # draw snake
        head = self.tail[-1]
        for point in self.tail[:-1]:  # snake tail
            pygame.draw.rect(self.screen, (20, 97, 18),
                            (point[1] * self.SCALE, point[0] * self.SCALE, self.SCALE, self.SCALE))
            
        pygame.draw.rect(self.screen, (42, 227, 39),
                        (head[1] * self.SCALE, head[0] * self.SCALE, self.SCALE, self.SCALE))  # snake head

        # draw apple
        pygame.draw.rect(self.screen, (255, 255, 255),
                        (self.apple[1] * self.SCALE, self.apple[0] * self.SCALE, self.SCALE, self.SCALE))

        pygame.display.flip()
        self.clock.tick(60)

        self.tick_counter += 1
        
    def action(self, actions = None, keys = None, human = False):
        if human:
            actions = [keys[pygame.K_UP], keys[pygame.K_DOWN], keys[pygame.K_RIGHT], keys[pygame.K_LEFT]]
        else:
            #actions = [round(a) for a in actions]
            actions = np.round(actions).astype(int)
        
        if self.currentdir in ["right", "left"]:
            if actions[0]:
                action = "up"
            elif actions[1]:
                action = "down"
            else:
                action = self.currentdir
        elif self.currentdir in ["up", "down"]:
            if actions[2]:
                action = "right"
            elif actions[3]:
                action = "left"
            else:
                action = self.currentdir
        else:
            if actions[0]:
                action = "up"
            elif actions[1]:
                action = "down"
            elif actions[2]:
                action = "right"
            elif actions[3]:
                action = "left"
            else:
                action = self.currentdir
        self.currentdir = action

        if action:
            new = (self.tail[-1][0] + self.directions[action][0], self.tail[-1][1] + self.directions[action][1])

            if new in self.tail:
                self.end()
            elif not (0 <= new[0] <= self.height) or not (0 <= new[1] <= self.width):
                self.end()
            
            self.tail.append(new)

            if new != self.apple:
                self.tail.pop(0)
            else: # eat apple
                newapple = (random.randint(0, self.height - 1), random.randint(0, self.width - 1))
                while newapple in self.tail: # prevent apples on the snake
                    newapple = (random.randint(0, self.height - 1), random.randint(0, self.width - 1))
                self.apple = newapple
    




In [5]:
board = SnakeBoard(15, 15)

def play_snake(board):

    while board:
        snek = SnakeGame(board)
        while snek.running:
            keys = pygame.key.get_pressed()
            snek.run()
            snek.capture_screen() # saves screen to file screen_data.txt
            if snek.tick_counter % 5 == 0:
                snek.action(keys=keys, human=True)

        
        if snek.ask_to_continue() == False:
            board.quit()
            break


play_snake(board)

KeyboardInterrupt: 

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers

# Define the DQN model
model = tf.keras.Sequential([
    layers.Flatten(input_shape=(15, 15)),  # Flatten the 15x15 grid
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(4, activation='softmax')  # Output layer with softmax activation
])

# Compile the model (you can use different optimizers and loss functions)
model.compile(optimizer='adam', loss='mse')



In [None]:
# Q-learning parameters
epsilon = 0.2 # Exploration-exploitation trade-off
discount_factor = 0.9  # Discount factor for future rewards
learning_rate = 0.001  # Learning rate for the neural network



# Simulate pressing of key
def simulate_key_press(key):
    key_event = pygame.event.Event(pygame.KEYDOWN, key=key)
    pygame.event.post(key_event)



# Training loop
num_episodes = 10000
board = SnakeBoard(15, 15)
for episode in range(num_episodes):
    # Initialize the Snake game environment
    
    snek = SnakeGame(board)

    # Reset environment and get initial state
    state = np.loadtxt('screen_data.txt', delimiter=',', dtype=str)  # Load the captured screen data
    state = state.reshape((1, 15, 15))
    state = state.astype(float)  # Convert the loaded strings to float

    # Initialize episode variables
    total_reward = 0
    curr_reward = 0

    while snek.running:
        snek.run()
        snek.capture_screen()
   
        next_state = np.loadtxt('screen_data.txt', delimiter=',', dtype=str)  # Load the captured screen data
        next_state = next_state.reshape((1, 15, 15))
        next_state = next_state.astype(float)  # Convert the loaded strings to float
        

        if snek.tick_counter % 1 == 0:

            # Choose an action using epsilon-greedy strategy
            if np.random.rand() < epsilon:
                q_values = [0, 0, 0, 1] 
                random.shuffle(q_values) # Exploration: Random action
                print(f"random {q_values}")
            else:
                q_values = model.predict(next_state)
                q_values = q_values[0]
                #action = np.argmax(q_values)  # Action with the highest Q-value index
                print(f"model: {q_values}")
            snek.action(actions=q_values, human=False)

        # Calculate reward based on the number of instances of the value 65 (tail pixels)
        prev_reward = curr_reward
        curr_reward = np.sum(next_state == 65)
        reward = (curr_reward - prev_reward) * 20

        if snek.running == False:
            print("punish")
            reward -= 10
        total_reward += reward
        # Update the Q-value using the Q-learning update rule
        target = reward + discount_factor * np.max(model.predict(next_state))
        model.fit(state, np.array([[target]]), epochs=1, verbose=0)

        done = snek.running  # Set 'done' based on game state
        # Move to the next state
        state = next_state

        # Accumulate total reward
       

    # Print total reward for the episode
    print(f"Episode {episode + 1}/{num_episodes}, Total Reward: {total_reward}")

    # Ask the user if they want to continue playing
    simulate_key_press(pygame.K_y)
    if snek.ask_to_continue() == False:
        board.quit()
        break


model: [1.0000000e+00 1.3164390e-20 1.0192822e-33 1.3510819e-14]
model: [1.000000e+00 4.814847e-31 0.000000e+00 0.000000e+00]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
You died!
Score: 1
punish
Episode 1/10000, Total Reward: -10
model: [1. 0. 0. 0.]
random [0, 0, 1, 0]
random [0, 0, 1, 0]
random [1, 0, 0, 0]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
You died!
Score: 1
punish
Episode 2/10000, Total Reward: -10
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
You died!
Score: 1
punish
Episode 3/10000, Total Reward: -10
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
random [0, 0, 0, 1]
random [0, 0, 1, 0]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
model: [1. 0. 0. 0.]
mod

KeyboardInterrupt: 

In [None]:
q_values[0]