In [58]:
from game import Game
from entity import Entity, PlayerEntity, AIGuide, AILearningGuide
import numpy as np
import ipywidgets as widgets
import matplotlib.pyplot as plt
#from tsp_solver.greedy import solve_tsp
from math import ceil
import time
import pickle
import pandas as pd

In [54]:
gameEntities = []
playerEntities = []
h = 9
w = 9

def distInObjects(posA,posB):
    x = abs(posA[0]-posB[0])
    y = abs(posA[1]-posB[1])
    diagonal = min(x,y)
    straight = abs(x-y)
    return ceil(straight/3) + ceil(diagonal/3)

def loadLevel(level):
    global playerEntities
    global gameEntities
    global h
    global w
    playerEntities = level[0]
    gameEntities = level[1]
    if len(gameEntities) == 4:
        h = 9
        w = 9
    elif len(gameEntities) == 10:
        h = 20
        w = 25
    else:
        h = 40
        w = 60
        
def save_obj(obj, name):
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [55]:
def minPath():
    positions = []
    p = playerEntities[0]
    positions.append((p['row'],p['col']))
    for e in gameEntities:
        positions.append((e['row'],e['col']))

    '''
    distances[newPos] = {}    
    for pos in positions:
        dist = DistInObjects(pos,newPos)
        distances[pos][newPos] = dist
        distances[newPos][pos] = dist
    '''
    
    '''
    distances = np.zeros(shape=(len(positions),len(positions)))
    for j in range(len(positions)):
        for k in range(j):
            d = DistInObjects(positions[j],positions[k])
            distances[j,k] = d
            distances[k,j] = d
    '''
    
    distances =  [[distInObjects(positions[i],positions[j]) for j in range(i)] for i in range(len(positions))]
    path = solve_tsp(distances, ini=0)
    return path,lenPath(distances,path)

def lenPath(distances,path):
    dist = 0
    for i in range(len(path)-1):
        a,b = min(path[i],path[i+1]),max(path[i],path[i+1])
        dist += distances[b][a]
    return dist

In [56]:
ticks = []

class LearnManager():
    def __init__(self, Q0 = {}, iterations = 1000, eps = True, sleepTime = 0.005, disp = False, idle = (10,0)):
        self.showDisp = disp
        self.sleepTime = sleepTime
        self.startIterations = iterations
        self.iterations = iterations
        self.Q = Q0
        self.eps = eps
        
        
        if self.showDisp:
            if self.eps == 0:
                e = 0
            elif self.eps < 0 or self.eps > 1:
                e = 1./(self.startIterations+2-self.iterations)
            else:
                e = self.eps
            self.sGame = SimGame(entities=gameEntities, updateTime = self.sleepTime, pEntities=playerEntities,aiguide=True,
                             decisionLog = False, guideSleepTime = 13, Q0 = self.Q, eps = e, lm = self, disp = self.showDisp, idle = idle)
        else:  
            while self.iterations > 0:
                if self.eps == 0:
                    e = 0
                elif self.eps < 0 or self.eps > 1:
                    e = 1./(self.startIterations+2-self.iterations)
                else:
                    e = self.eps
                #clear_output()
                self.sGame = SimGame(entities=gameEntities, updateTime = self.sleepTime, pEntities=playerEntities,aiguide=True,
                                 decisionLog = False, guideSleepTime = 13, Q0 = self.Q, eps = e, lm = self, disp = self.showDisp, idle = idle)
                self.Q = self.sGame.guide.Q
                self.iterations -= 1
                #print("Iterations remaining: ", self.iterations, "with epsilon =", e)
                ticks.append(self.sGame.state['score'])
                del self.sGame
            #print("Completed!")

In [57]:
CELL_SIZE = 24
rots = [(0,1),(1,1),(1,0),(1,-1),(0,-1),(-1,-1),(-1,0),(-1,1)]

class SimGame(Game):
    def __init__(self, updateTime = 0.1, entities=None, pEntities=None,
                 aiguide = True, decisionLog = False, guideSleepTime = 10, 
                 Q0 = {}, eps = 0.01, lm = None, disp = True, idle = (10,0)):
        self.hasGuide = False
        if aiguide:
            self.hasGuide = aiguide
            self.guide = AILearningGuide(eps = eps, gamma = .5, lr = .33, sleepTime = guideSleepTime, Qtable = Q0)
            self.guideActionReport = [['SLEEP',[5]],'PROGRESS']
            self.goals = []
            self.oldState = 'start'
            self.reward = 0
            self.lastAction = (0,0)
            self.lm = lm
        self.log = decisionLog
        self.board = None
        self.gameEntities = []
        self.playerEntities = []
        self.playerActionReports = []
    
        self.boardHeight = h
        self.boardWidth = w
        self.boardVisits = np.zeros((self.boardHeight,self.boardWidth))
        self.state = {'ticks': 0, 'score': 0}
        
        if entities != None:
            for e in entities:
                self.gameEntities.append(Entity(e))
            if aiguide:
                for e in self.gameEntities:
                    if e.entityType == 'GOAL':
                        self.goals.append(e)
                
        if pEntities != None:        
            for p in pEntities:
                self.playerEntities.append(PlayerEntity(p, idleChoices = [(['LOOK_AROUND', [1]],idle[0]),(['WALK_FORWARD', [1]],idle[1])]))
                self.boardVisits[p['row'],p['col']] += 1
                self.playerActionReports.append([['IDLE',[]],'OK'])
        
        Game.__init__(self, showDisplay=disp, updateTime = updateTime)

    def entitiesInCell(self,row,col):
        entities = []
        for e in self.gameEntities:
            if e.position[0]==row and e.position[1]==col:
                entities.append(e)
            elif e.position[0]<=row and e.position[0]+e.size>row and e.position[1]<=col and e.position[1]+e.size>col:
                entities.append(e)
        for p in self.playerEntities:
            if p.position[0]==row and p.position[1]==col:
                entities.append(p)
            elif p.position[0]<=row and p.position[0]+p.size>row and p.position[1]<=col and p.position[1]+p.size>col:
                entities.append(p)
        return entities
    
    def cellsSeenByPlayer(self, player):
        cells = [(player.position[0],player.position[1])]
        if player.rotation % 2 != 0:
            for i in range(1,player.vision+1):
                cells.append((player.position[0] + i * rots[player.rotation][0],
                              player.position[1] + i * rots[player.rotation][1]))
                for j in range(1,i):
                    cells.append((player.position[0] + i * rots[player.rotation][0],
                                  player.position[1] + j * rots[player.rotation][1]))
                    cells.append((player.position[0] + j * rots[player.rotation][0],
                                  player.position[1] + i * rots[player.rotation][1]))
        else:
            auxRot = (player.rotation + 2) % 8
            for i in range(0,player.vision):
                for j in range(-i,i+1):
                    cells.append((player.position[0] + (i+1) * rots[player.rotation][0] + j * rots[auxRot][0],
                                  player.position[1] + (i+1) * rots[player.rotation][1] + j * rots[auxRot][1]))
        return cells
    
    def entitiesSeenByPlayer(self, player):
        cellsSeen = self.cellsSeenByPlayer(player)
        entitiesSeen = []
        for c in cellsSeen:
            entities = self.entitiesInCell(c[0],c[1])
            for e in entities:
                entitiesSeen.append(e)
        return entitiesSeen

    def getCell(self,row,col):
        cellsSeen = []
        for p in self.playerEntities:
            cellsSeen += self.cellsSeenByPlayer(p)
            
        entities = self.entitiesInCell(row,col)
        if len(entities) > 0:
            entity = entities[0]
            if entity!=None:
                if entity.entityType == 'PICKABLE_OBJECT':
                    cellBackground = '#ff0000'
                elif entity.entityType == 'GOAL':
                    cellBackground = '#ff69b4'
                else: cellBackground = '#ff9900'
        elif (row,col) in cellsSeen:
            cellBackground = '#00ff00'
        else:
            g = hex(int(255*(1-1/(1+(1+self.boardVisits[row,col])*0.33))))
            color = "#33" + str(g)[2:4] + "33"
            cellBackground = color
        return '<td id="%04d" style="width:%dpx;height:%dpx;background:%s;"> </td>' % (row*100+col,CELL_SIZE,CELL_SIZE,cellBackground)

    def buildBoardView(self):
        board = '<table style="background:#000;border-collapse:separate;border-spacing:0px">'
        for row in range(self.boardHeight):
            board += '<tr>'
            for col in range(self.boardWidth):
                board += self.getCell(row,col)
            board += '</tr>'
        board += '</table>'
        return board

    def setBoardView(self):
        self.boardView = widgets.HTML(value=self.buildBoardView(),disabled=False)
        return self.boardView

    def setStateView(self):
        ticks = widgets.Label(value="0000")
        score = widgets.Label(value="%04d" % self.state['score'])
        self.stateView = {'ticks': ticks, 'score': score}
        return widgets.HBox([widgets.VBox([widgets.Label(value="Ticks"),
                                           widgets.Label(value="Score")]),
                                    widgets.VBox([widgets.Label(value=": "),
                                                  widgets.Label(value=": ")]),
                                    widgets.VBox([ticks,score])])

    def setActionButtons(self):
        buttons = '<span>'
        return widgets.HTML(value=buttons,disabled=False)

    def gameStep(self):
        self.executionManager()
        self.state['ticks'] += 1
        if self.display:
            self.stateView['ticks'].value = "%04d" % self.state['ticks']
            self.stateView['score'].value = "%04d" % self.state['score']
            self.boardView.value = self.buildBoardView()
        return False

    def executionManager(self):
        if self.gameStatus != 2:
            for i in range(len(self.playerEntities)):
                p = self.playerEntities[i]
                report = self.playerActionReports[i]
                etitiesSeen = self.entitiesSeenByPlayer(p)

                action = p.chooseAction(etitiesSeen, report)
                if action[0] == 'LOOK_AROUND':
                    p.rotation = (p.rotation + action[1][0]) % 8
                    self.playerActionReports[i] = [action,'OK']

                if action[0] == 'WALK_FORWARD':
                    desiredPosition = p.position + rots[p.rotation]
                    if min(desiredPosition[0],desiredPosition[1]) >= 0 and desiredPosition[0] < self.boardHeight -1 and desiredPosition[1] < self.boardWidth-1:
                        p.position = desiredPosition
                        self.playerActionReports[i] = [action,'OK']
                        self.boardVisits[p.position[0],p.position[1]] += 1
                    else:
                        self.playerActionReports[i] = [action,'INTERRUPTED']

                elif action[0] == 'WALK_TOWARDS':
                    goal = action[1][0]
                    dif = goal - p.position
                    x = np.sign(dif[0])
                    y = np.sign(dif[1])
                    if p.distanceToPos(goal) == 1:
                        self.playerActionReports[i] = [action,'OK']
                    else:
                        self.playerActionReports[i] = [action,'PROGRESS']
                    if p.distanceToPos(goal) > 0: p.rotation = rots.index((x,y))
                    p.position += np.array([x,y])
                    self.boardVisits[p.position[0],p.position[1]] += 1

                elif action[0] == 'PICK_UP':
                    es = self.entitiesInCell(p.position[0],p.position[1])
                    found = False
                    if len(es)>0:
                        for e in es:
                            if e != None and e.entityType == 'PICKABLE_OBJECT':
                                self.gameEntities.remove(e)
                                found = True
                    if found:
                        self.playerActionReports[i] = [action,'OK']
                    else: self.playerActionReports[i] = [action,'INTERRUPTED']
            if self.log: print(self.playerActionReports)

            if self.hasGuide:
                p = self.playerEntities[0]
                self.goals = sorted(self.goals, key=lambda g: p.distanceToPos(g.position))
                if len(self.goals) > 0 and p.distanceToPos(self.goals[0].position) == 0:
                    self.reward = 1.
                    self.gameEntities.remove(self.goals[0])
                    self.goals.remove(self.goals[0])
                if len(self.goals) > 0:
                    g = self.goals[0]
                    state = (g.position[0]-p.position[0],g.position[1]-p.position[1])
                else: 
                    state = 'terminal'
                guideAction = self.guide.chooseAction(self.guideActionReport,state)
                if guideAction[0] == 'SLEEP':
                    guideAction[1][0] -= 1
                    if guideAction[1][0] == 0: 
                        self.guideActionReport = [guideAction,'OK']
                        #learn
                        if self.oldState != 'start':
                            self.guide.learn(self.oldState,self.lastAction,self.reward,state)
                            self.oldState = state
                            self.reward = -0.01
                            if state == 'terminal':
                                self.stopGame()
                        else:
                            self.oldState = state
                    else: self.guideActionReport = [guideAction,'PROGRESS']
                if guideAction[0] == 'SPAWN_OBJECT':
                    newObject = {'type': 'PICKABLE_OBJECT', 'row': 0, 'col': 0, 'size': 1, 'id': 1, 'rot': 0}
                    if guideAction[1][0] == 'random':
                        newObject['row'] = np.random.randint(0, self.boardHeight)
                        newObject['col'] = np.random.randint(0, self.boardWidth)
                        self.gameEntities.append(Entity(newObject))
                    else:
                        self.state['score'] += 1
                        self.lastAction = guideAction[1][0]
                        rot = guideAction[1][0][0]
                        dist = guideAction[1][0][1]
                        r = p.position[0] + dist * rots[rot][0]
                        c = p.position[1] + dist * rots[rot][1]
                        if r in range(0, self.boardHeight) and c in range(0, self.boardWidth):
                            newObject['row'] = r
                            newObject['col'] = c
                            self.gameEntities.append(Entity(newObject))
                    self.guideActionReport = [['SLEEP',[self.guide.sleepTime]],'PROGRESS']
        return

In [7]:
layouts = ["SMALL","MEDIUM","BIG"]
savePlace = ["10_0","9_1","8_2","7_3","Random"]
personalities = [(10,0),(9,1),(8,2),(7,3)]
levels = load_obj("Levels")
steps,bestPerLayout = {},{}
ticks = []

for i in range(len(layouts)):
    loadLevel(levels[i])
    best = minPath()[1]
    bestPerLayout[layouts[i]] = best
    iterations = 1000
    for j in range(len(savePlace)):
        ticks = []
        path = savePlace[j]+'-'+layouts[i]+'-irace'
        Q = load_obj(path)
        for _ in range(iterations):
            if j == len(personalities): LM = LearnManager(Q0 = Q, iterations = 1, eps = 0, disp = False, idle = personalities[np.random.randint(len(personalities))])
            else: LM = LearnManager(Q0 = Q, iterations = 1, eps = 0, disp = False, idle = personalities[j])
        steps[path] = sum(ticks)/len(ticks)
        print(path,sum(ticks)/len(ticks),"/", best)

TypeError: solve_tsp() got an unexpected keyword argument 'ini'

In [9]:
layouts = ["SMALL","MEDIUM","BIG"]
savePlace = ["10_0","9_1","8_2","7_3","Random"]
personalities = [(10,0),(9,1),(8,2),(7,3)]
levels = load_obj("Levels")
steps,bestPerLayout = {},{}
ticks = []

for i in range(len(layouts)):
    loadLevel(levels[i])
    best = minPath()[1]
    bestPerLayout[layouts[i]] = best
    iterations = 1000
    for j in range(len(savePlace)):
        ticks = []
        path = savePlace[j]+'-'+layouts[i]
        Q = load_obj(path)
        for _ in range(iterations):
            if j == len(personalities): LM = LearnManager(Q0 = Q, iterations = 1, eps = 0, disp = False, idle = personalities[np.random.randint(len(personalities))])
            else: LM = LearnManager(Q0 = Q, iterations = 1, eps = 0, disp = False, idle = personalities[j])
        steps[path] = sum(ticks)/len(ticks)
        print(path,sum(ticks)/len(ticks),"/", best)

10_0-SMALL 6.0 / 6
9_1-SMALL 7.435 / 6
8_2-SMALL 8.549 / 6
7_3-SMALL 9.989 / 6
Random-SMALL 8.1 / 6
10_0-MEDIUM 23.0 / 24
9_1-MEDIUM 32.184 / 24
8_2-MEDIUM 39.742 / 24
7_3-MEDIUM 49.895 / 24
Random-MEDIUM 36.42 / 24
10_0-BIG 86.809 / 76
9_1-BIG 106.386 / 76
8_2-BIG 129.485 / 76
7_3-BIG 169.291 / 76
Random-BIG 123.233 / 76


In [10]:
for i in range(len(layouts)):
    loadLevel(levels[i])
    iterations = 1000
    path = "Random"+'-'+layouts[i]
    Q = load_obj(path)
    for j in range(len(personalities)):
        ticks = []
        LM = LearnManager(Q0 = Q, iterations = iterations, eps = 0, disp = False, idle = personalities[j])
        steps[path+"_as_"+savePlace[j]] = sum(ticks)/len(ticks)
        print("Trained with Random, tested with", savePlace[j], "In layout", layouts[i], "Steps:", sum(ticks)/len(ticks))
        print("\t vs. Trained with", savePlace[j], steps[savePlace[j]+'-'+layouts[i]])

Trained with Random, tested with 10_0 In layout SMALL Steps: 6.0
	 vs. Trained with 10_0 6.0
Trained with Random, tested with 9_1 In layout SMALL Steps: 7.631
	 vs. Trained with 9_1 7.435
Trained with Random, tested with 8_2 In layout SMALL Steps: 8.931
	 vs. Trained with 8_2 8.549
Trained with Random, tested with 7_3 In layout SMALL Steps: 10.818
	 vs. Trained with 7_3 9.989
Trained with Random, tested with 10_0 In layout MEDIUM Steps: 26.0
	 vs. Trained with 10_0 23.0
Trained with Random, tested with 9_1 In layout MEDIUM Steps: 32.64
	 vs. Trained with 9_1 32.184
Trained with Random, tested with 8_2 In layout MEDIUM Steps: 40.12
	 vs. Trained with 8_2 39.742
Trained with Random, tested with 7_3 In layout MEDIUM Steps: 53.624
	 vs. Trained with 7_3 49.895
Trained with Random, tested with 10_0 In layout BIG Steps: 87.042
	 vs. Trained with 10_0 86.809
Trained with Random, tested with 9_1 In layout BIG Steps: 107.803
	 vs. Trained with 9_1 106.386
Trained with Random, tested with 8_2 In

In [12]:
for i in range(len(layouts)):
    loadLevel(levels[i])
    iterations = 1000
    path = "Average"+'-'+layouts[i]
    Q = load_obj(path)
    for j in range(len(personalities)):
        ticks = []
        LM = LearnManager(Q0 = Q, iterations = iterations, eps = 0, disp = False, idle = personalities[j])
        steps[path+"_as_"+savePlace[j]] = sum(ticks)/len(ticks)
        print("Average-Q, tested with", savePlace[j], "In layout", layouts[i], "Steps:", sum(ticks)/len(ticks))
        print("\t vs. Trained with", savePlace[j], steps[savePlace[j]+'-'+layouts[i]])

Average-Q, tested with 10_0 In layout SMALL Steps: 6.0
	 vs. Trained with 10_0 6.0
Average-Q, tested with 9_1 In layout SMALL Steps: 7.21
	 vs. Trained with 9_1 7.435
Average-Q, tested with 8_2 In layout SMALL Steps: 8.584
	 vs. Trained with 8_2 8.549
Average-Q, tested with 7_3 In layout SMALL Steps: 10.531
	 vs. Trained with 7_3 9.989
Average-Q, tested with 10_0 In layout MEDIUM Steps: 24.0
	 vs. Trained with 10_0 23.0
Average-Q, tested with 9_1 In layout MEDIUM Steps: 31.343
	 vs. Trained with 9_1 32.184
Average-Q, tested with 8_2 In layout MEDIUM Steps: 39.053
	 vs. Trained with 8_2 39.742
Average-Q, tested with 7_3 In layout MEDIUM Steps: 51.954
	 vs. Trained with 7_3 49.895
Average-Q, tested with 10_0 In layout BIG Steps: 87.997
	 vs. Trained with 10_0 86.809
Average-Q, tested with 9_1 In layout BIG Steps: 104.482
	 vs. Trained with 9_1 106.386
Average-Q, tested with 8_2 In layout BIG Steps: 132.606
	 vs. Trained with 8_2 129.485
Average-Q, tested with 7_3 In layout BIG Steps: 181

In [16]:
for pers in ["10_0","9_1","8_2","7_3"]:
    for i in range(len(layouts)):
        loadLevel(levels[i])
        iterations = 1000
        path = pers+'-'+layouts[i]
        Q = load_obj(path)
        for j in range(len(personalities)):
            ticks = []
            LM = LearnManager(Q0 = Q, iterations = iterations, eps = 0, disp = False, idle = personalities[j])
            steps[path+"_as_"+savePlace[j]] = sum(ticks)/len(ticks)
            print(pers+", tested with", savePlace[j], "In layout", layouts[i], "Steps:", sum(ticks)/len(ticks))
            print("\t vs. Trained with", savePlace[j], steps[savePlace[j]+'-'+layouts[i]])

10_0, tested with 10_0 In layout SMALL Steps: 6.0
	 vs. Trained with 10_0 6.0
10_0, tested with 9_1 In layout SMALL Steps: 7.305
	 vs. Trained with 9_1 7.435
10_0, tested with 8_2 In layout SMALL Steps: 8.955
	 vs. Trained with 8_2 8.549
10_0, tested with 7_3 In layout SMALL Steps: 10.445
	 vs. Trained with 7_3 9.989
10_0, tested with 10_0 In layout MEDIUM Steps: 23.0
	 vs. Trained with 10_0 23.0
10_0, tested with 9_1 In layout MEDIUM Steps: 33.533
	 vs. Trained with 9_1 32.184
10_0, tested with 8_2 In layout MEDIUM Steps: 41.419
	 vs. Trained with 8_2 39.742
10_0, tested with 7_3 In layout MEDIUM Steps: 53.973
	 vs. Trained with 7_3 49.895
10_0, tested with 10_0 In layout BIG Steps: 86.809
	 vs. Trained with 10_0 86.809
10_0, tested with 9_1 In layout BIG Steps: 111.235
	 vs. Trained with 9_1 106.386
10_0, tested with 8_2 In layout BIG Steps: 148.712
	 vs. Trained with 8_2 129.485
10_0, tested with 7_3 In layout BIG Steps: 212.013
	 vs. Trained with 7_3 169.291
9_1, tested with 10_0 I

In [113]:
keys = ['10_0','9_1','8_2','7_3','Random','Average']

In [115]:
layouts = ["SMALL","MEDIUM","BIG"]

In [118]:
d = load_obj("StepsEvaluation")
for l in layouts:
    daux = {key.replace('_as_'," ").replace("-"+l,''): d[key] for key in d if "as" in key and l in key}
    df = pd.DataFrame(index = keys)
    for k in keys[:-2]:
        data = [daux[''.join([i,' ',k])] for i in keys]
        df[k] = pd.Series(np.array(data)/daux[''.join([k,' ',k])]*100-100, index=keys)
    print(l)
    print(df)
    print()

SMALL
         10_0       9_1       8_2       7_3
10_0      0.0  0.095917  6.404468 -5.372350
9_1       0.0  0.000000  2.934886 -3.107447
8_2       0.0  3.233763  0.000000 -5.780033
7_3      17.1  8.029597  4.954848  0.000000
Random    0.0  4.562894  6.119297 -1.993115
Average   0.0 -1.205810  1.996198 -4.593223

MEDIUM
              10_0       9_1       8_2       7_3
10_0      0.000000  4.807001  4.662152  8.534256
9_1       8.834783  0.000000  0.224895  3.169177
8_2      13.147826  2.050320  0.000000  3.026403
7_3      17.621739  1.975309  1.917926  0.000000
Random   13.043478  2.015940  1.379694  7.832452
Average   4.347826 -2.037818 -1.316521  4.474250

BIG
             10_0       9_1        8_2        7_3
10_0     0.000000  5.730662  13.013345  20.921571
9_1     -3.157507  0.000000  -1.549533   4.316978
8_2      4.865855 -0.675817   0.000000   1.511427
7_3      3.969634  5.081459  -1.614129   0.000000
Random   0.268405  2.468490  -0.540323   0.026806
Average  1.368522 -0.688174   