In [144]:
import numpy as np
import random
from copy import deepcopy

In [145]:
class SnakeNode():
    def __init__(self, parent, x, y):
        self.parent = parent
        self.x = x
        self.y = y
    def SetX(self, newx):
        self.x = newx
    def SetY(self, newy):
        self.y = newy
    def SetParent(self, newparent):
        self.parent = newparent
    def Pos(self):
        return (self.x, self.y)
    def Index(self):
        return (self.y, self.x)

In [146]:
class Snake():

    def __init__(self, HeadX, HeadY):
        self.head = SnakeNode(None, HeadX, HeadY)
        self.tail = self.head

    def MoveBodyForward(self):
        currentnode = self.tail
        while (currentnode.parent != None):
            parentpos = currentnode.parent.Pos()
            currentnode.SetX(parentpos[0])
            currentnode.SetY(parentpos[1])
            currentnode = currentnode.parent

    def Move(self, direction):
        (oldtailx, oldtaily) = self.tail.Pos()
        self.MoveBodyForward()
        headpos = self.head.Pos()
        if direction == 0:
            self.head.SetY(headpos[1] - 1)
        elif direction == 1:
            self.head.SetX(headpos[0] + 1)
        elif direction == 2:
            self.head.SetY(headpos[1] + 1)
        elif direction == 3:
            self.head.SetX(headpos[0] - 1)
        
        return (oldtailx, oldtaily, *self.head.Pos())
    
    def NewHead(self, headx, heady):
        newhead = SnakeNode(None, headx, heady)
        self.head.SetParent(newhead)
        self.head = newhead

    def Head(self):
        return self.head
    def Tail(self):
        return self.tail

In [147]:
class Game():
    x = 1
    def __init__(self, weights):
        self.foodval = 10
        self.headval = 5
        self.bodyval = 2
        self.features = np.zeros(5, dtype= int)
        #self.theta = np.ones(np.shape(self.features), dtype= int)
        self.theta = weights
        self.statevalue = 0
        self.obstacleval = 1
        self.length = 1
        self.board = np.zeros((20,20), dtype=int)
        self.SpawnObstacle()
        self.SpawnSnake()
        self.SpawnFood()




    def SpawnObstacle(self):
        obstaclespawns = []
        obstaclespawns.append(np.random.randint(1, 19, size=(2)))
        obstaclespawns.append(np.random.randint(1, 19, size=(2)))
        obstaclespawns.append(np.random.randint(1, 19, size=(2)))
        obstaclespawns.append(np.random.randint(1, 19, size=(2)))
        for i in obstaclespawns:
            b0 = i
            b1 = (i[0] + 1, i[1])
            b2 = (i[0], i[1] + 1)
            b3 = (i[0] + 1, i[1] + 1)
            self.board[b0[0], b0[1]] = self.obstacleval
            self.board[b1[0], b1[1]] = self.obstacleval
            self.board[b2[0], b2[1]] = self.obstacleval
            self.board[b3[0], b3[1]] = self.obstacleval



    def SpawnSnake(self):
        snakespawns = []
        for index, cell in np.ndenumerate(self.board):
            if cell == 0:
                snakespawns.append(index)
        self.snakespawn = random.choice(snakespawns)
        startx = self.snakespawn[0]
        starty = self.snakespawn[1]
        #self.board[startx, starty] = self.headval
        self.snake = Snake(startx, starty)
        return startx, starty


    def SpawnFood(self):
        foodspawns = []
        for index, cell in np.ndenumerate(self.board):
            if cell == 0:
                foodspawns.append(index)
        self.foodspawn = random.choice(foodspawns)
        self.board[self.foodspawn] = self.foodval

    def HeadPos(self, direction, headx, heady):
        if(0<headx<20) and (0<heady<20):
            if direction == 0:
                heady -= 1
            elif direction == 1:
                headx += 1
            elif direction == 2:
                heady += 1
            elif direction == 3:
                headx -= 1
        return (headx, heady)
    
    def CollisionCheck(self, direction, headx, heady):
        
        headx, heady = self.HeadPos(direction, headx, heady)

        if (headx == 0) or (headx == 20) or (heady == 0) or (heady == 20):
            return True
        elif self.board[heady, headx] == self.bodyval:
            return True
        elif self.board[heady, headx] == self.obstacleval:
            return True
        return False
   
    def Extractfeatures(self, headx, heady):
        
        for i in range(4):
            self.features[i] = self.CollisionCheck(i, headx, heady)
        #dist = self.FoodDirection(heady, headx)
        #self.features[4:] = 1 / (dist + 1)
        self.features[4:] = deepcopy(self.FoodDirection(heady, headx))
        return self.features
    
    def FoodDirection(self, heady, headx):
        foodvector = np.array(self.foodspawn) - np.array((heady, headx))
        foodfeatures = np.abs(foodvector)
        mandist = foodfeatures[0] + foodfeatures[1]
        return mandist

    def Move(self, direction):
        (headx, heady) = self.snake.Head().Pos()
        gameisover = False
        if not self.CollisionCheck(direction, headx, heady):
            reward = 0
            (headx, heady) = self.snake.Head().Pos()
            self.board[heady, headx] = self.bodyval


            potx, poty = self.HeadPos(direction, headx, heady)

            if self.board[poty, potx] == self.foodval:
                self.snake.NewHead(potx, poty)
                self.board[poty, potx] = self.headval
                self.SpawnFood()
                self.length += 1 
                reward = 10000
            else:
                (oldtailx, oldtaily, newheadx, newheady) = self.snake.Move(direction)
                self.board[oldtaily, oldtailx] = 0
                self.board[newheady, newheadx] = self.headval
        else:
            reward = -1000
            gameisover = True
        
        self.features = self.Extractfeatures(headx, heady)
        self.statevalue = np.dot(self.theta.T, self.features)
        return self.statevalue, self.features, reward, gameisover, self.length


    def BestMove(self):
        (headx, heady) = self.snake.Head().Pos()
        Wfeatures = deepcopy(self.Extractfeatures(headx, heady-1))
        Afeatures = deepcopy(self.Extractfeatures(headx-1, heady))
        Sfeatures = deepcopy(self.Extractfeatures(headx, heady+1))
        Dfeatures = deepcopy(self.Extractfeatures(headx+1, heady))
        Wscore = np.dot(self.theta.T, Wfeatures)
        Ascore = np.dot(self.theta.T, Afeatures)
        Sscore = np.dot(self.theta.T, Sfeatures)
        Dscore = np.dot(self.theta.T, Dfeatures)
        scores = []
        scores.append(Wscore)
        scores.append(Dscore)
        scores.append(Sscore)
        scores.append(Ascore)
        bestmove = np.argmax(scores)
        return bestmove


    def display(self):
        for i in range(22):
            print('-', end='')

        for i in range(20):
            print('\n|', end='')
            for j in range(20):
                if self.board[i, j] == 0:
                    print(' ', end='')
                elif self.board[i, j] == self.headval:
                    print('S', end='')
                elif self.board[i, j] == self.bodyval:
                    print('#', end='')
                elif self.board[i, j] == self.foodval:
                    print('F', end='')
                elif self.board[i, j] == self.obstacleval:
                    print('X', end='')
            print('|', end='')
        print()
        for i in range(22):
            print('-', end='')
        print()

In [148]:
def Epoch(weights):
    gamelog = []
    rewardlog = []
    valuelog = []
    game = Game(weights)

    while True:
        game.display()
        direction = game.BestMove()
        #direction = input("Input Direction (w,a,s,d or q to quit): ")
        if direction == 0:
            statevalue, features, reward, gameOver, score = game.Move(0)
        elif direction == 3:
            statevalue, features, reward, gameOver, score = game.Move(3)
        elif direction == 2:
            statevalue, features, reward, gameOver, score = game.Move(2)
        elif direction == 1:
            statevalue, features, reward, gameOver, score = game.Move(1)


        tempval = deepcopy(statevalue)
        tempfeature = deepcopy(features)
        tempreward = deepcopy(reward)
        rewardlog.append(tempreward)
        gamelog.append(tempfeature)
        valuelog.append(tempval)
        print(f"features:{features}  statevalue:{statevalue}  reward:{reward}, length: {score}")

        if gameOver:
            print(f"Game Over, Snake Length:{score}")
            break
    
    return gamelog, rewardlog, score

In [149]:
def UpdateWeigths(weights, gamelog, rewardlog, learning_rate):
    lastfeature = deepcopy(gamelog[-1])
    realreward = deepcopy(rewardlog[-1])
    rewardhat = np.dot(weights.T, lastfeature)
    weights = weights + learning_rate*(realreward - 0)*lastfeature
    return weights

In [150]:
weights = np.array([0, 0, 0, 0, 0])
learning_rate = 0.0001
iterations = 30
longest_snake = 0
for i in range(iterations):
    gamelog, rewardlog , score= Epoch(weights)
    weights = UpdateWeigths(weights, gamelog, rewardlog, learning_rate)
    if score > longest_snake:
        longest_snake = score

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
|        XXXX        |
|        XXXX        |
|                    |
|               XX   |
|F              XX   |
|                    |
|         S          |
|                    |
|                    |
|                    |
|                    |
|                    |
|                    |
|              XX    |
|              XX    |
|                    |
----------------------
features:[ 0  0  0  0 11]  statevalue:-216.70000000000005  reward:0, length: 1
----------------------
|                    |
|                    |
|                    |
|                    |
|        XXXX        |
|        XXXX        |
|                    |
|               XX   |
|F              XX   |
|         S          |
|                    |
|                    |
|                    |
|                    |
|                    |
|                    |
|                    |
|              XX    |
|              XX    |
|    

In [152]:
print(f"Training concluded after {iterations} iterations")
print(f"Longest snake length achieved: {longest_snake}")
print(f"Learned Weigths: {weights}")

Training concluded after 30 iterations
Longest snake length achieved: 5
Learned Weigths: [ -2.1  -1.3  -1.5  -1.5 -33.7]
