# GPU setup

In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


# GDrive setup

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Magic setup stuff

In [None]:
W = 10  # "Logiczna" szerokość
H = 10  # "Logiczna" wysokość
SCALE = 25  # Skala ekran-logika

In [None]:
import random

class Pos:
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y

    def __add__(self, o):
        return Pos(self.x + o.x, self.y + o.y)

    def __eq__(self, other):
        return self.x == other.x and self.y == other.y

    def random_pos(self):
        self.x = random.randint(0, W-1)
        self.y = random.randint(0, H-1)
    
    def euq_dist(self, other):
        return abs(self.x - other.x) + abs(self.y - other.y)

UP = Pos(0, -1)
DOWN = Pos(0, 1)
RIGHT = Pos(1, 0)
LEFT = Pos(-1, 0)

M = [UP, DOWN, RIGHT, LEFT]

In [None]:
import numpy as np

class Game:
    def __init__(self): 
        self.width = W
        self.height = H
        self.apple = Pos()
        self.apple.random_pos()
        self.snake = [Pos(int(W / 2), int(H / 2)), 
                      Pos(int(W / 2), int(H / 2)+1), 
                      Pos(int(W / 2), int(H / 2)+2)]
        self.apple_counter = 0
        self.direction = UP
        self.score = 0
        self.step_counter = 0

    def next_step(self): # Returns (reward, everything alright?, score)
        reward = 0
        self.step_counter += 1
        self.apple_counter += 1

        self.snake.insert(0, self.snake[0] + self.direction)
        if self.snake[0].x < 0 or self.snake[0].y < 0 or self.snake[
                0].x >= W or self.snake[0].y >= H or self.snake[0] in self.snake[1:] or self.apple_counter > 25:
            reward = -10
            return (reward, False, self.score)
        if self.snake[0] == self.apple:
            # Wąż się nie kurczy!
            # Wylosuj nową pozycję dla jabłka
            self.score += 1
            reward += 10
            self.apple.random_pos()
            self.apple_counter = 0
            # Póki wylosowana wartość jest "w" wężu, losuj znowu
            while self.apple in self.snake:
                self.apple.random_pos()
        else:
            self.snake.pop()
        return (reward, True, self.score)

    def get_field(self, pos):
        # 1 - Apple
        # 2 - Snake
        # 3 - Out of bounds
        if self.apple == pos:
            return 1
        elif pos in self.snake:
            return 2
        elif pos.x < 0 or pos.x >= W or pos.y < 0 or pos.y >= H:
            return 3
        return 0

    def get_qstate(self):
        # Create an empty state array
        state = np.empty(0)
        clockwise = [UP, RIGHT, DOWN, LEFT]
        cnt = 0
        if self.direction == UP:
            cnt = 0
        elif self.direction == RIGHT:
            cnt = 1
        elif self.direction == DOWN:
            cnt = 2
        elif self.direction == LEFT:
            cnt = 3
        
        fwd = clockwise[cnt%4]
        right = clockwise[(cnt+1)%4]
        left = clockwise[(cnt+3)%4]

        '''if fwd == UP:
            state = np.append(state, [
                True, False, False, False,
                
            ])'''
        state = np.append(state, [
            fwd == UP,
            fwd == DOWN,
            fwd == RIGHT,
            fwd == LEFT
        ])

        state = np.append(state, [
            self.snake[0].x > self.apple.x,
            self.snake[0].x < self.apple.x,
            self.snake[0].x == self.apple.x
        ])

        state = np.append(state, [
            self.snake[0].y > self.apple.y,
            self.snake[0].y < self.apple.y,
            self.snake[0].y == self.apple.y
        ])

        state = np.append(state, [
            self.apple == Pos(self.snake[0].x, self.snake[0].y-1),
            Pos(self.snake[0].x, self.snake[0].y-1) in self.snake,
            self.snake[0].x < 0 or self.snake[0].y-1 < 0 or self.snake[0].x >= W or self.snake[0].y-1 >= H
        ])

        state = np.append(state, [
            self.apple == Pos(self.snake[0].x, self.snake[0].y+1),
            Pos(self.snake[0].x, self.snake[0].y+1) in self.snake,
            self.snake[0].x < 0 or self.snake[0].y+1 < 0 or self.snake[0].x >= W or self.snake[0].y+1 >= H
        ])
        
        state = np.append(state, [
            self.apple == Pos(self.snake[0].x+1, self.snake[0].y),
            Pos(self.snake[0].x+1, self.snake[0].y) in self.snake,
            self.snake[0].x+1 < 0 or self.snake[0].y < 0 or self.snake[0].x+1 >= W or self.snake[0].y >= H
        ])

        state = np.append(state, [
            self.apple == Pos(self.snake[0].x-1, self.snake[0].y),
            Pos(self.snake[0].x-1, self.snake[0].y) in self.snake,
            self.snake[0].x-1 < 0 or self.snake[0].y < 0 or self.snake[0].x-1 >= W or self.snake[0].y >= H
        ])
        return state.reshape((1, 22))

In [None]:
from collections import deque
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras import Input
from keras.layers import Dense
from keras.optimizers import Adam
from keras.utils import to_categorical
import numpy as np
import random

MAX_MEMORY = 100000

class Agent:
    def __init__(self):
        self.model = Sequential()
        self.model.add(Dense(64, activation='relu', input_dim=22))
        self.model.add(Dense(48, activation='relu'))
        #self.model.add(Dense(24, activation='relu'))
        #self.model.add(Dense(12, activation='relu'))
        #self.model.add(Dense(3, activation='softmax'))

        #self.model.add(Dense(256, activation="relu", input_dim=22))
        self.model.add(Dense(3, activation="softmax"))

        self.model.compile(
            loss="mse",
            optimizer=Adam()
        )

        self.memory = deque(maxlen=MAX_MEMORY)
        self.gamma = 0.9
        self.game_count = 0
    
    def get_action(self, state):
        pred = self.model.predict(state, verbose = "0")
        move = to_categorical(np.argmax(pred[0]), num_classes=3)
        clockwise_dir = [UP, RIGHT, DOWN, LEFT]
        cnt = 0
        
        epsilon = np.exp(-0.1*self.game_count)
        if random.randint(0, 100) < epsilon:
            cnt += random.randint(-1, 1)
        else:
            if move[0] == 1.0:
                cnt -= 1
            elif move[2] == 1.0:
                cnt += 1
        
        if state[0][1] == 1:
            cnt += 2
        elif state[0][2] == 1:
            cnt += 1
        elif state[0][3] == 1:
            cnt += 3

        return (clockwise_dir[cnt%4], pred)

    def train_short_memory(self, state, action, reward, new_state, alright):
        if not alright:
            target = reward
        else:
            target = reward + self.gamma * np.amax(self.model.predict(new_state, verbose = "0")[0])
        (_, target_f) = self.get_action(state)
        target_f[0][np.argmax(action)] = target
        print(action)
        self.model.fit(state, target_f, epochs = 1)
    
    def replay_mem(self, replay_batch_size):
        print("Replaying memory of game no", self.game_count)
        self.game_count += 1
        replay_batch_size = min(replay_batch_size, len(self.memory))
        batch = random.sample(self.memory, replay_batch_size)
        for (state, action, reward, new_state, alright) in batch:
            self.train_short_memory(state, action, reward, new_state, alright)

    def load_model(self, model_path):
        self.model.load_weights(model_path)

In [None]:
from dataclasses import dataclass
import random
import time
import numpy as np

def train(model_path = "", save_path = ""):
    running = True

    game = Game()
    agent = Agent()
    if model_path != "":
        agent.load_model(model_path)
    print("Starting game no", agent.game_count)

    while running:
        current_state = game.get_qstate()
        (action, pred) = agent.get_action(current_state)

        game.direction = action
        (reward, game_alright, score) = game.next_step()

        new_state = game.get_qstate()
        
        # train short memory
        agent.train_short_memory(current_state, pred, reward, new_state, game_alright)
        # append memory
        agent.memory.append((current_state, pred, reward, new_state, game_alright))

        if not game_alright or game.step_counter > 80*(game.score+3):
            game = Game(0)
            # train long memory
            agent.replay_mem(1000)
            print("Starting game no", agent.game_count)
            if agent.game_count % 10 == 0:
                agent.model.save(save_path + "/" + str(agent.game_count))

Starting game no 0
[[0.592664  0.0263946 0.3809414]]
[[0.46385357 0.43683216 0.09931432]]
[[0.5557402  0.3981326  0.04612725]]
[[0.90924597 0.06547626 0.02527775]]
[[0.59078616 0.02533042 0.38388348]]
[[0.47016448 0.43056864 0.0992669 ]]
[[0.56911385 0.38450873 0.04637747]]
[[0.91076446 0.06388462 0.02535087]]
[[0.5837654  0.02428911 0.39194557]]
[[0.4767577  0.42367065 0.0995717 ]]
[[0.5895018  0.36425993 0.04623822]]
[[0.9116431  0.06283504 0.02552183]]
[[0.5736463  0.02328539 0.40306824]]
[[0.48373064 0.4162189  0.10005048]]
[[0.61312675 0.34107518 0.045798  ]]
[[0.91207063 0.06216888 0.02576049]]
[[0.5619311  0.02233387 0.4157351 ]]
[[0.49121442 0.4082158  0.10056977]]
[[0.6373172  0.31753543 0.04514739]]
[[0.9121586  0.06179392 0.02604753]]
[[0.5497523  0.0214478  0.42879993]]
[[0.4992998  0.3996681  0.10103201]]
[[0.6603633  0.29526228 0.04437444]]
[[0.91197515 0.06165482 0.02636997]]
[[0.5379685  0.02063777 0.44139373]]
[[0.5080091 0.3906215 0.1013694]]
Replaying memory of game 

In [None]:
train("")