In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import copy
!pip install pympler
from pympler import asizeof
from collections import Counter
from timeit import timeit
import os
from google.colab import drive
import random
import time
#drive.mount('/content/gdrive')
# from google.colab import drive
# drive.mount('/content/drive')

Collecting pympler
[?25l  Downloading https://files.pythonhosted.org/packages/e8/e2/2f3a086701bb62b1c478a3921836271177838a3c98cdc6b82c3bb36d3854/Pympler-0.9.tar.gz (178kB)
[K     |█▉                              | 10kB 22.5MB/s eta 0:00:01[K     |███▊                            | 20kB 10.4MB/s eta 0:00:01[K     |█████▌                          | 30kB 8.3MB/s eta 0:00:01[K     |███████▍                        | 40kB 7.5MB/s eta 0:00:01[K     |█████████▏                      | 51kB 4.3MB/s eta 0:00:01[K     |███████████                     | 61kB 4.9MB/s eta 0:00:01[K     |████████████▉                   | 71kB 5.0MB/s eta 0:00:01[K     |██████████████▊                 | 81kB 5.3MB/s eta 0:00:01[K     |████████████████▌               | 92kB 5.7MB/s eta 0:00:01[K     |██████████████████▍             | 102kB 5.8MB/s eta 0:00:01[K     |████████████████████▏           | 112kB 5.8MB/s eta 0:00:01[K     |██████████████████████          | 122kB 5.8MB/s eta 0:00:01[K  

In [39]:
class Chessboard():
  def __init__(self):
    #self.dic = {"  " : 0, "Pawn" : 1, "Rook" : 2, "Knight" : 3, "Bishop" : 4, "Queen" : 5, "King" : 6 }
    ########experimental
    self.pieces = []
    self.fiftyMoveRule = 0
    self.gameOver = False
    self.white_to_move = True
    self.kingpositions = [[7,4],[0,4]]
    self.value_advantage = 0.0001
    self.played_moves = []
    self.prepare_board()
    self.visited_boardstates = {}

  def __str__(self):
    prettyprint = { "None" : "  ", "wRook": "wR", "bRook": "bR", "wKnight" : "wk", "bKnight" : "bk", "wBishop" : "wB",
                        "bBishop" : "bB", "wQueen" : "wQ", "bQueen" : "bQ", "wKing" : "wK!", "bKing" : "bK!", "bPawn" : "bP", "wPawn" : "wP"}
    arr = []
    for e in self.board:
      arr.append([prettyprint[str(i)] for i in e])    
    return str(np.array(arr))

  def reset(self):
    self.pieces = []
    self.fiftyMoveRule = 0
    self.gameOver = False
    self.white_to_move = True
    self.kingpositions = [[7,4],[0,4]]
    self.value_advantage = 0.0001
    self.played_moves = []
    self.prepare_board()
    self.visited_boardstates = {}
    
  def prepare_board(self):
    self.board = np.empty([8,8],dtype = object)
    self.board[0] = [Rook( True ),Knight( True),Bishop( True),Queen( True),King( [0,4], True),Bishop( True),Knight( True),Rook( True)]
    self.board[7] = [Rook( False),Knight( False),Bishop( False),Queen( False),King( [7,4], False),Bishop( False),Knight( False),Rook( False)]
    self.board[1] = [Pawn( True),Pawn( True),Pawn( True),Pawn( True),Pawn( True),Pawn( True),Pawn( True),Pawn( True)]
    self.board[6] = [Pawn( False),Pawn( False),Pawn( False),Pawn( False),Pawn( False),Pawn( False),Pawn( False),Pawn( False)]

  def prepare_testboard(self):
    self.board = np.empty([8,8],dtype = object)                               ##############################
    self.board[0] = [Rook( True ),Knight( True),Bishop( True),Queen( True),King( [0,4], True),Bishop( True),Knight( True),Rook( True)]
    self.board[7] = [Rook( False),Knight( False),Bishop( False),Queen( False),King( [7,4], False),Bishop( False),Knight( False),Rook( False)]

  def changeKingPosition(self, is_white, position):
    if (is_white):
      self.white_king = position
    else: self.black_king = position

  def getPiece(self, position):
    return self.board[position[0],position[1]]

  def getMoves(self):
    if self.white_to_move:
      piece_positions = np.argwhere(self.board == True)
      self.pieces = self.board[self.board == True] #np.where(self.board == True)
    else:
      piece_positions = np.argwhere(self.board == False)
      self.pieces = self.board[self.board == False]
    all_moves = []
    for position in piece_positions:
      moves = self.getPiece(position).getMoves(position, self)
      all_moves = all_moves + moves
    return all_moves    

  def isInside(self,y,x):
    if (y in range(0,8) and x in range(0,8)):
      return True
    return False

  def kingInCheck(self, prin = False):
    king = self.kingpositions[int( not self.white_to_move)]
    try:
      val = self.getPiece(king).isChecked(king, self)
    except:
      if (prin): print(self)
    return val

  def movePiece(self, move, actual_move = True):
    #experimental
    if (len(move)==3):
      self.board[move[0][0],move[0][1]] = move[2]
      self.fiftyMoveRule = 0
      #print(self.board[move[0][0],move[0][1]])
      self.value_advantage -= move[2].value
    try:
      moving_piece = self.board[move[0][0],move[0][1]]
      moving_piece.move([move[1][0],move[1][1]])
      if(isinstance(moving_piece, Pawn)):
        self.fiftyMoveRule = 0
    # This exception deals with Castling moves since they are basically 2 moves in one
    except AttributeError as e:
      #print("tried to do Castling: {} but an error occured: {} ".format(move, e))
      # This is also responsible to make on-passant moves
      for partmove in move:
        self.movePiece(partmove)
      self.white_to_move = not self.white_to_move
      # Optional line of code so we append Castling-moves as one move
      self.played_moves.append([self.played_moves.pop(),self.played_moves.pop()])
      self.addBoardstateToSeen()
      #####
      return
    try:
      self.value_advantage -= self.getPiece(move[1]).value
      self.fiftyMoveRule = 0
    except: pass
    self.board[move[0][0],move[0][1]] = None
    self.board[move[1][0],move[1][1]] = moving_piece
    self.played_moves.append(move)
    self.fiftyMoveRule += 1
    if (str(moving_piece) in ["wKing", "bKing"]):
      self.kingpositions[int(self.white_to_move)] = [move[1][0],move[1][1]]
    self.white_to_move = not self.white_to_move
    self.addBoardstateToSeen()

  def copyBoard(self):
    new_board = copy.deepcopy(self)
    return new_board

  def transform_board(self, board):
    vocab = {"None" : 0, "wPawn" : 1, "wRook" : 2, "wKnight" : 3, "wBishop" : 4, "wQueen" : 5, "wKing" : 6, "bPawn" : 7, "bRook" : 8, "bKnight" : 9, "bBishop" : 10, "bQueen" : 11, "bKing" : 12}
    int_board = []
    for row in board:
      #for keeping the structure of the board
      #int_board.append([vocab[str(piece)] for piece in row])  #shape (8,8,13)
      #for returning the board as one long tensor without structure 
      for piece in row:
        int_board.append(vocab[str(piece)])                 #shape: (64,13)
    tf_board = tf.convert_to_tensor(int_board)
    oh_board = tf.one_hot(tf_board, 13)
    oh_board = tf.expand_dims(oh_board, axis = 0) 
    #oh_board = tf.expand_dims(oh_board, axis = 0) #Experimental
    return oh_board

  def getID(self, string = True):
    vocab = {"None" : 0, "wPawn" : 1, "wRook" : 2, "wKnight" : 3, "wBishop" : 4, "wQueen" : 5, "wKing" : 6, "bPawn" : 7, "bRook" : 8, "bKnight" : 9, "bBishop" : 10, "bQueen" : 11, "bKing" : 12}
    int_board = []
    if (string):
      s = ""
      for row in self.board:
        for piece in row:
          s += str(vocab[str(piece)])
      white_to_move = int(self.white_to_move)
      s += str(white_to_move)
      s += str(int(self.getPiece(self.kingpositions[white_to_move]).has_moved))
      return s
    for row in self.board:
      for piece in row:
        int_board.append(vocab[str(piece)])                 #shape: (64,13)
    return  int_board#dic#tuple(int_board)#int_board

  def addBoardstateToSeen(self):
    id = self.getID(True)
    self.addCountToDict(int(id))

  def addCountToDict(self, id):
    if (id in self.visited_boardstates.keys()):
      self.visited_boardstates[id] += 1
    else:
      self.visited_boardstates[id] = 1

In [3]:
class Boardmanager():
  def __init__(self):
    self.board = Chessboard()
    self.game_over = False
    self.horizontalAttacker = ["bRook","bQueen","wRook","wQueen"]
    self.diagonalAttacker = ["bBishop","bQueen","wBishop","wQueen"]
    # ind:0 are black pawn attacks ind:1 are white pawn attacks
    self.moves = []

  def getMoves(self, board):
    self.moves = []
    all_moves = board.getMoves()
    for move in all_moves:
      # This part is to check for illigal moves leaving or moving the King in check
      new_board = board.copyBoard()
      # Attribute error occurs when trying to move a castling move, because it are 2 moves in one, which gives an error when trying to index
      try:
        new_board.movePiece(move,True)
      except AttributeError as e:
        #We do not need to test for Checks because that is done inside the castling method
        pass
      if not (new_board.kingInCheck()):
        self.moves.append(move)
    #print("all moves : {}".format(self.moves))   
    return self.moves,

  def getMovesAgent(self, board):
    self.moves = []
    all_moves = board.getMoves()
    #experimental
    expanded_boards = []
    for move in all_moves:
      # This part is to check for illigal moves leaving or moving the King in check
      new_board = board.copyBoard()
      # Attribute error occurs when trying to move a castling move, because it are 2 moves in one, which gives an error when trying to index
      try:
        new_board.movePiece(move,True)
      except AttributeError as e:
        pass
        #We do not need to test for Checks because that is done inside the castling method
        #print("tried to expand a castling move. {}".format(e))
      if not (new_board.kingInCheck()):
        self.moves.append(move)
        #experimental
        expanded_boards.append(new_board)
    #print("all moves : {}".format(self.moves))   
    return (self.moves, expanded_boards)
 
  def gameOver(self, board):
    if (self.staleMate(board)):
      print("Stalemate")
      if (board.kingInCheck()):
        ## DO End Game and give return for Win/Loss
        self.game_over = True
        print("Won by checkmate : {}".format(self.game_over))
        print(len(board.pieces))
        return True, -1
      else:
        ## DO End Game and give return for Draw
        self.game_over = True
        print("Draw by stalemate : {}".format(self.game_over))
        print(len(board.pieces))
        return True, 0
    if (len(board.pieces)<=3):
      self.game_over = True
      print("Won by insufficient Material : {}".format(self.game_over))
      print(len(self.board.pieces))
      return True, -1
    if (3 in board.visited_boardstates.values()):
      self.game_over = True
      print("Draw by repitition : {}".format(self.game_over))
      return True, 0
    if (board.fiftyMoveRule > 99):
      self.game_over = True
      print("Draw by shuffling-Pieces : {}".format(self.game_over))
      return True, 0
    return False, 0

  def staleMate(self, board):
    if ( not bool(self.moves)):
      return True
    return False

  def expand_with_move(self,board,move):
    new_board = board.copyBoard()
    new_board.movePiece(move,True)
    return new_board

###SearchTree

In [4]:
class Edge():
  def __init__(self, inNode, outNode, action, prior ):
    self.inNode = inNode
    self.outNode = outNode
    self.action = action
    self.white_to_move = inNode.board.white_to_move
    self.stats =  {
					'visits': 0, #N
					'Worth': 0, #W
					'Advantage': 0, #Q Experimental
					'Probability': prior} #P

class Node():
  def __init__(self, board):
    self.board = board
    self.edges = []
  
  def isLeaf(self):
    return (len(self.edges)<1)

class Tree():
  def __init__(self, board):
    self.root = Node(board)
    self.tree = {}
    self.addNode(self.root, int(self.root.board.getID()))

  def addNode(self, node, id):
    self.tree[id] = node

  def traverseTree(self, epsilon = 0.2):
    '''Starting from the Root traverse the search-tree until we reach a leafnode. When walking through our tree,
    nodes are choosen relative to their estimated value and the number of times we have already visited them.
    This gives us a good exploration-exploitation tradeoff, while expanding possible future moves.
    The Leafnode we reach is the node, we expect to have the highest information gain. It is returned to be subsequently expanded.
    '''
    path = []
    current_node = self.root
    done = 0
    value = 0
    max_depth = 10
    i = 0
    while not (current_node.isLeaf()):
      maxInformationgain = -9999
      if current_node == self.root:
        #nu = np.random.dirichlet([1] * len(current_node.edges))
        epsilon = 0.5
      else: 
        epsilon = 0
        #nu = [0]*len(current_node.edges)
      summed_visits = 0
      for action, edge in current_node.edges:
        summed_visits += edge.stats["visits"]
      
      for idx, (action, edge) in enumerate(current_node.edges):
        #cpuct ausgeschlossen da wir nicht wissen was es tut
        #probability_visit_discount = ((1-epsilon) * edge.stats["Probability"] + epsilon * nu[idx]) * np.sqrt(summed_visits) / (1 + edge.stats["visits"])
        probability_visit_discount = ((1-epsilon) * edge.stats["Advantage"] ) * np.sqrt(summed_visits) / (1 + edge.stats["visits"]) #+ epsilon * nu[idx]

        Advantage = edge.stats["Advantage"]
        
        if Advantage + probability_visit_discount > maxInformationgain:
          maxInformationgain = Advantage + probability_visit_discount
          nextAction = action
          nextEdge = edge
      i +=1
      if (i>max_depth): 
        print(i)
        #curren_node = 
        break
      #newState = current_node.board.MovePiece(action)
      current_node = nextEdge.outNode
      path.append(nextEdge)
    return current_node, path

  def backFill(self, leaf, value, path):
    playing_white = leaf.board.white_to_move
    #print("Backfilling:")
    for edge in path:
      #edge.white_to_move ist das inboard.white_to_move
      white_to_move = edge.white_to_move
      #print("leafnote white to move : {} ; previousstate white to move: {}".format(playing_white, white_to_move))
      #Experimental comments
      if white_to_move == playing_white:
       direction = 1
      else:
        direction = (-1) # Endof experimental
      edge.stats['visits'] = edge.stats['visits'] + 1
      edge.stats['Worth'] = edge.stats['Worth'] + value * direction
      #print("edgestats Worth get updated with value  = {}".format(value * direction))
      edge.stats['Advantage'] = edge.stats['Worth'] / edge.stats['visits']
    #print("done---------------------------")


###Testclass

In [42]:
class Testclass():
  def __init__(self):
    self.bm = Boardmanager()
    self.game_going = True
    # self.white_agent = Agent(True, self.bm)
    # self.black_agent = Agent(False, self.bm)
    self.agents = [Agent(False, self.bm),Agent(True, self.bm)]
    self.agent = Agent(True, self.bm)

  def testFullGame(self):
    self.game_going = True
    curr_board = self.bm.board
    while (self.game_going):
      self.bm.getMoves(curr_board)
      move_idx = self.agents[0].chooseMove(curr_board)#int(curr_board.white_to_move)
      move = self.bm.moves[move_idx]
      print("choosen Move: {}".format(move))
      curr_board.movePiece(move)
      self.game_going = not self.bm.gameOver(curr_board)

  def testexpansion(self):
    self.bm.getMoves(self.bm.board)
    print(self.agents[1].expandsearch())
  
  def testCastling(self):
    self.bm.board.prepare_board()#testboard()
    self.bm.board.board[0,1] = None
    self.bm.board.board[0,2] = None
    self.bm.board.board[0,5] = None
    self.bm.board.board[0,6] = None
    self.bm.board.board[0,3] = None
    self.bm.board.board[7,3] = Queen(False)
    #self.bm.board.board[0,3] = Pawn(False)
    moves = []
    self.bm.board.getPiece([0,4]).castling((0,4),self.bm.board,moves)#getMoves([0,4], self.bm.board)
    print(self.bm.board)
    print(self.bm.board.white_to_move)
    print(moves)
    print("-----------------")
    print(self.bm.getMoves(self.bm.board))
    self.bm.board.movePiece(self.bm.moves[4])
    print(self.bm.board.white_to_move)
    print(self.bm.board.played_moves)

  
  def testOnpassant(self):
    self.bm.board.prepare_testboard()
    self.bm.board.board[3,3] = Pawn(False)
    self.bm.board.board[3,5] = Pawn(False)
    self.bm.board.board[1,4] = Pawn(True)
    self.bm.board.movePiece([(1,4),(3,4)])
    print(self.bm.board)
    ls = self.bm.board.getPiece((3,3)).getMoves((3,3),self.bm.board)
    print(ls)
    ls.append(self.bm.board.getPiece((3,5)).getMoves((3,5),self.bm.board))
    print(ls)
    ##################
    self.bm.board.prepare_testboard()
    self.bm.board.board[4,6] = Pawn(True)
    self.bm.board.board[6,5] = Pawn(False)
    self.bm.board.board[4,4] = Pawn(True)
    self.bm.board.white_to_move = False
    self.bm.board.movePiece([(6,5),(4,5)])
    print(self.bm.board)
    ls = self.bm.board.getPiece((4,4)).getMoves((4,4),self.bm.board)
    print(ls)
    ls.append(self.bm.board.getPiece((4,6)).getMoves((4,6),self.bm.board))
    print(ls)

  def testPromotion(self):
    self.bm.board.prepare_testboard()
    self.bm.board.board[6,1] = Pawn(True)
    print(self.bm.getMoves(self.bm.board))
    self.bm.board.movePiece(self.bm.moves[-3])
    print(self.bm.board)
  
  def testRepetition(self):
    self.bm.board.prepare_testboard()
    self.bm.board.movePiece(((0,1),(2,0)))
    self.bm.board.movePiece(((2,0),(0,1)))
    self.bm.board.movePiece(((0,1),(2,0)))
    self.bm.board.movePiece(((2,0),(0,1)))
    self.bm.board.movePiece(((0,1),(2,0)))
    self.bm.getMoves(self.bm.board)
    print(self.bm.board.visited_boardstates)
    print(self.bm.gameOver(self.bm.board))

  def testSearchTree(self):
    self.agents[1].act(self.bm.board)

  def collectExperience(self):
    trajectory = {}
    trajectory["state"] = []
    trajectory["pi"] = []
    trajectory["value"] = []
    trajectory["NN_estimate"] = []
    board = self.bm.board
    while (self.game_going):
      print("before act+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
      state, move, value, NN_estimate = self.agent.act(board)#self.agents[int(board.white_to_move)].act(board)
      print("after act ----------------------------------------------------------------------------------")
      ###
      board.movePiece(move)
      ###
      trajectory["state"].append(state)
      trajectory["pi"].append(move)
      trajectory["value"].append(value)
      trajectory["NN_estimate"].append(NN_estimate)
      print("estimated value of state = {}".format(value))
      self.bm.getMoves(board)
      gameover,sample_return = self.bm.gameOver(board)
      #Experimental
      if (gameover):
        print("gamegoing is going to be false")
        self.game_going = False
        #return trajectory
        #############
    print(board)
    #print(trajectory)
    return trajectory

  def getOneTrajectory(self, n_epochs):
    #try:
    #  self.agent.NN.load_weights("/content/saved_model_weights_chess0")
    #except: print("no weights to load")
    self.initilizeBuffer(1000,["state","pi","value","NN_estimate"])
    times = []
    for i in range(n_epochs):
      print(i)
      start = time.time()
      self.reset()
      trajectory = self.collectExperience()
      self.buffer.put(trajectory)
      print("done with playing")
      for step in range(300):
        sample = self.buffer.sample(50)
        loss = self.agent.train_step(sample["state"],sample["value"])#, tf.keras.losses.MSE()
        # print("weights:-------------------------------------------------------------------------------------------------------------------")
        # print(self.agent.NN.get_weights())
      end = get_time(start)
      times.append(end)
      print("done with training")
      self.testSaveModel()
      print("done with saving")
      #print("models new weights")
      print("Needed time steps: " + str(end))
    #print(self.agent.NN.get_weights())
    print("Average of time steps needed: " + str(get_average(times)))

  def initilizeBuffer(self, size, keys):
    self.buffer = Replay_buffer(size,keys)

  def testSaveModel(self,suffix = ""):
    if len(suffix) > 0:
      self.agent.saveModel(suffix)
    else:
      self.agent.saveModel()
  def testBuffer(self):
    self.initilizeBuffer(100,["sas","sos"])
    self.buffer.put({"sas":[0,1],"sos":[2,3]})
    print(self.buffer.sample(1))

  def reset(self):
    print("board before training")
    self.bm.board.reset()
    self.agent.boardmanager = self.bm
    print(self.bm.board)
    self.game_going = True
    self.agent.searchtree = None

  def testLoadModel(self, suffix = "" ):
    #self.agent.NN(self.agent.boardmanager.board.transform_board(self.agent.boardmanager.board.board))
    #print('before---------------------------------')
    #print(self.agent.NN.get_weights())
    
    self.agent.NN.load_weights("/content/saved_model_weights_chess"+ suffix)
    self.agent.NN(self.agent.boardmanager.board.transform_board(self.agent.boardmanager.board.board))
    print('after++++++++++++++++++++++++++++++++++++++')
    print(self.agent.NN.get_weights())
  
  def trainagent(self, n_epochs=1, savesuffix = "batchnorm"):
    self.initilizeBuffer(1000,["state","pi","value","NN_estimate"])
    times = []
    for i in range(n_epochs):
      print(i)
      start = time.time()
      self.reset()
      trajectory = self.collectExperience()
      self.buffer.put(trajectory)
      end = get_time(start)
      times.append(end)
      print("done with playing")
      for step in range(300):
        sample = self.buffer.sample(50)
        loss = self.agent.train_step(sample["state"],sample["value"])#, tf.keras.losses.MSE()
      # if (summary):
      #   self.agent.NN.summary()
      print("done with training")
      self.testSaveModel()
      print("done with saving")
      #print("models new weights")
      print("Needed time steps for playing: " + str(end))
    print("Average time steps for 1 trajectory: " + str(get_average(times)))

  # def get_summary(self):
  #   self.agent.NN.summary()

###Agent

In [43]:
class Agent():
  def __init__(self, plays_white, boardmanager):
    self.plays_white = plays_white
    self.boardmanager = boardmanager
    self.searchtree = None
    self.NN = ChessBot_dense()
    self.tau = 0
    self.optimizer = tf.keras.optimizers.Adam(0.001)

  def chooseMove(self, pi, values, tau, greedy = True):
    if tau == 0:
      # moves = np.argwhere(pi == max(pi))
      # move_idx = np.random.choice(moves[0])
      #print(values)
      moves = np.argwhere(values == max(values))
      move_idx = np.random.choice(moves[0])
    else:
      move_idx = np.random.multinomial(1, pi)
      move_idx = np.where(move_idx == 1)[0][0]
    value = values[move_idx]
    #print(f"val in estimator for agent : {value}")
    return move_idx, value

  def getValueEstimate(self, board):
    ### Momentan lernt das model mit piecevalue inclusive und nicht nur ob das spiel eher gewonnen oder verloren wird
    #adv = board.value_advantage
    #if not (board.white_to_move):
    #  adv = (-1) * adv
    nn_board = board.transform_board(board.board)
    val = self.NN(nn_board)
    return val

  def act(self,board, num_steps=2):
    # We check wether the current boardstate is already in our searchtree
    #if so we we set the root to the already seen state in the searchtree
    # if not we create a new searchtree with the current state as rootnode
    if (self.searchtree == None or int(board.getID()) not in self.searchtree.tree):
      self.searchtree = Tree(board)
    else:
      self.searchtree.root = self.searchtree.tree[int(board.getID())]
    # Move throught the searchtree for a specific number of iterations
    for step in range(num_steps):
      self.expandsearch1()
      if step%100 == 0:
        print("-------------------------------------------------------------------------------")
    pi,values = self.getActionValues(1) 
    #pick a specific action
    move_idx, value = self.chooseMove(pi, values, self.tau)
    move = self.searchtree.root.edges[move_idx][0]
    NN_estimate = self.getValueEstimate(self.boardmanager.board)
    state = self.boardmanager.board.transform_board(self.boardmanager.board.board)
    #self.boardmanager.board.movePiece(move)
    print(self.boardmanager.board)
    #what we need is state and final return
    return (state, move, value, NN_estimate)

  def getActionValues(self, tau):
    edges = self.searchtree.root.edges
    move_probability = np.zeros(len(edges), dtype = np.integer)
    values = np.zeros(len(edges), dtype=np.float32)

    for idx, (action, edge) in enumerate(edges):
      move_probability[idx] = pow(edge.stats['visits'], 1/tau)
      values[idx] = edge.stats['Advantage']
    move_probability = move_probability / (np.sum(move_probability) * 1.0)
    return move_probability, values

  def expandsearch1(self, board = None):
    leaf, path = self.searchtree.traverseTree()
    value = self.evaluateLeafNode(leaf)
    self.searchtree.backFill(leaf, value, path)

  def evaluateLeafNode(self,leaf):
    moves, boards = self.boardmanager.getMovesAgent(leaf.board)
    #wir checken ob der boardstate ein finaler state ist, wenn ja expanden wir nichtmehr und returnen den wert des final states (-1,0,1)
    done, value = self.boardmanager.gameOver(leaf.board) #used to be self.boardmanager.board
    if not done:
      #ansonsten bewertet das NN den state und wir benutzen diese Vorhersage als wert
      value = self.getValueEstimate(leaf.board)
      for move, board in zip(moves,boards):
        id = int(board.getID())
        if not (id in self.searchtree.tree):
          node = Node(board)
          self.searchtree.addNode(node, id)
        else:
          node = self.searchtree.tree[id]
        newEdge = Edge(leaf, node, move, prior = 1)
        leaf.edges.append((move, newEdge))
    if (done):
      print(f"leafnode {leaf.board}is terminal boardstate with finishvalue of {value}")
    return value

  def train_step(self, state, target):##########################
    with tf.GradientTape() as tape:
      #tf.expand_dims(state, axis = 0)#Experimental
      state = tf.squeeze(state, axis = 1) #Experimental
      expectation = self.NN(state)
      loss = tf.keras.losses.MSE(target, expectation)

      gradients = tape.gradient(loss, self.NN.trainable_variables)
    self.optimizer.apply_gradients(zip(gradients, self.NN.trainable_variables))
    return loss
  
  def saveModel(self,idx = "batchnorm"):
    #self.NN.save(os.getcwd())
    self.NN.save_weights(f'saved_model_weights_chess{idx}', overwrite = True)

  # def get_summary(self):
  #   self.NN.summary()

In [44]:
test = Testclass()
#test.testRepetition()
#test.testSearchTree()
# for i in range(5):
#   test.getOneTrajectory(1)
#   test.agent.saveModel(2)
#test.testLoadModel("batchnorm2")
test.trainagent(n_epochs = 1, savesuffix = "batchnorm3")
#test.agent.get_summary()


0
board before training
[['wR' 'wk' 'wB' 'wQ' 'wK!' 'wB' 'wk' 'wR']
 ['wP' 'wP' 'wP' 'wP' 'wP' 'wP' 'wP' 'wP']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['bP' 'bP' 'bP' 'bP' 'bP' 'bP' 'bP' 'bP']
 ['bR' 'bk' 'bB' 'bQ' 'bK!' 'bB' 'bk' 'bR']]
before act+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-------------------------------------------------------------------------------
[['wR' 'wk' 'wB' 'wQ' 'wK!' 'wB' 'wk' 'wR']
 ['wP' 'wP' 'wP' 'wP' 'wP' 'wP' 'wP' 'wP']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['bP' 'bP' 'bP' 'bP' 'bP' 'bP' 'bP' 'bP']
 ['bR' 'bk' 'bB' 'bQ' 'bK!' 'bB' 'bk' 'bR']]
after act ----------------------------------------------------------------------------------
estimated



-------------------------------------------------------------------------------
[['wR' '  ' 'wB' 'wQ' 'wK!' 'wB' 'wk' 'wR']
 ['wP' 'wP' 'wP' 'wP' 'wP' 'wP' 'wP' 'wP']
 ['wk' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['bP' 'bP' 'bP' 'bP' 'bP' 'bP' 'bP' 'bP']
 ['bR' 'bk' 'bB' 'bQ' 'bK!' 'bB' 'bk' 'bR']]
after act ----------------------------------------------------------------------------------
estimated value of state = 0.0
before act+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-------------------------------------------------------------------------------
[['wR' '  ' 'wB' 'wQ' 'wK!' 'wB' 'wk' 'wR']
 ['wP' 'wP' 'wP' 'wP' 'wP' 'wP' 'wP' 'wP']
 ['wk' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['bP' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' '  ' '  ' '  ' '  ' '  ' '  ' '  ']
 ['  ' 'bP

  return array(a, dtype, copy=False, order=order)


done with training
done with saving
Needed time steps for playing: 58.36162
Average time steps for 1 trajectory: 58.36162


In [None]:
# bm = Boardmanager()
# agent = Agent(True,bm)
# model = ChessBot()
# #print(agent.NN.get_weights())
# print(model.get_weights())
test = Testclass()
test.testLoadModel("batchnorm")

NotFoundError: ignored

In [None]:
# t = tf.zeros((50, 1, 1, 64, 128))
# print(t)
# t = tf.squeeze(t, axis = 1)
# print(t)

##Models

###Dense model

In [7]:
class ChessBot_dense(tf.keras.Model):
    def __init__(self):
        super(ChessBot_dense, self).__init__()
        self.dense_layers =  [tf.keras.layers.Dense(units = 256, activation = None, input_shape = ),
                              tf.keras.layers.BatchNormalization(),
                              tf.keras.layers.LeakyReLU(),
                              #tf.keras.layers.MaxPool2D(pool_size = (2,2), padding = 'same'),
                              #tf.keras.layers.Dropout(0.1),
                              tf.keras.layers.Dense(units = 128, activation = None),
                              #tf.keras.layers.Reshape((64,128)),
                              tf.keras.layers.BatchNormalization(),
                              tf.keras.layers.LeakyReLU(),
                              tf.keras.layers.Dense(units = 64 , activation = None),
                              #tf.keras.layers.Reshape((64,32)),
                              tf.keras.layers.BatchNormalization(),
                              tf.keras.layers.LeakyReLU(),
                              tf.keras.layers.Dense(units = 32 , activation = None),
                              tf.keras.layers.BatchNormalization(),
                              tf.keras.layers.LeakyReLU(),
                              tf.keras.layers.Dense(units = 16 , activation = None),
                              tf.keras.layers.BatchNormalization(),
                              tf.keras.layers.LeakyReLU(),
                              tf.keras.layers.Flatten(),
                              # tf.keras.layers.Reshape((64,1)),
                              tf.keras.layers.Dense(units = 1, activation = "tanh", use_bias = False)]
    
    def __call__(self, input):
        for layer in self.dense_layers:
          input = layer(input)
        return input

##convolutional model

In [8]:
class ChessBot_conv(tf.keras.Model):
    def __init__(self):
        super(ChessBot_conv, self).__init__()
      
        self.conv_layers =  [tf.keras.layers.Conv2D(filters = 256, kernel_size = 3, padding = 'same', activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                             tf.keras.layers.Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                             tf.keras.layers.Conv2D(filters = 64 , kernel_size = 3, padding = 'same', activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                             tf.keras.layers.Conv2D(filters = 32 , kernel_size = 3, padding = 'same', activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                             tf.keras.layers.Conv2D(filters = 16 , kernel_size = 3, padding = 'same', activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                             tf.keras.layers.Flatten(),
                             tf.keras.layers.Dense(units = 1, activation = 'tanh', use_bias = False)]
    
    def __call__(self, input):
        for layer in self.conv_layers:
            input = layer(input)
        return input

##Mixed model

In [9]:
class ChessBot_mix(tf.keras.Model):
    def __init__(self):
        super(ChessBot_mix, self).__init__()
      
        self.mix_layers =  [tf.keras.layers.Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                             #tf.keras.layers.Flatten(),
                             tf.keras.layers.Dense(units = 64, activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                            
                             tf.keras.layers.Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                             #tf.keras.layers.Flatten(),
                             tf.keras.layers.Dense(units = 32, activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                            
                             tf.keras.layers.Conv2D(filters = 16, kernel_size = 3, padding = 'same', activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                             #tf.keras.layers.Flatten(),
                             tf.keras.layers.Dense(units = 16, activation = None),
                             tf.keras.layers.BatchNormalization(),
                             tf.keras.layers.LeakyReLU(),
                            
                             tf.keras.layers.Flatten(),
                             tf.keras.layers.Dense(units = 1, activation = 'tanh', use_bias = False)]
    
    def __call__(self, input):
        for layer in self.mix_layers:
            input = layer(input)
        return input

##buffer

In [10]:
class Replay_buffer():
    def __init__(self, size, keys):
        self.buffer = {}
        for k in keys:
            self.buffer[k] = []
        self.size = size
        self.keys = keys

    def put(self, data_dict):
        dict_keys = list(data_dict.keys())

        current_len = len(self.buffer[self.keys[0]])
        add_len = len(data_dict[dict_keys[0]])
        new_len = current_len + add_len

        if new_len >= self.size:
            pop_len = new_len - self.size

            for k in self.buffer.keys():
                self.buffer[k] = self.buffer[k][pop_len:]

        for k in dict_keys:
            self.buffer[k].extend(data_dict[k])

        return self.buffer

    def sample(self, num):

        seed = random.randint(0, 100)
        sample = {}
        for k in self.buffer.keys():
            random.seed(seed)
            sample[k] = np.asarray(random.choices(self.buffer[k], k=num))
        return sample

In [11]:
def get_time(start):
  current = time.time()
  time_steps = current - start
  return round(time_steps, 5)

def get_average(list):
  return sum(list) / len(list)

#Pieces

##Base-class

In [12]:
class Piece():
  def __init__(self, val = 1,is_white = True):
    #self.val = val
    self.diagonalMoves = [(1,1),(1,-1),(-1,1),(-1,-1)]
    self.horizontalMoves = [(1,0),(0,1),(-1,0),(0,-1)]
    self.knightJumps = [(1,2),(1,-2),(-1,2),(-1,-2),(2,1),(2,-1),(-2,1),(-2,-1)]
    self.__is_white = is_white
  
  def __eq__(self,other):
    return (self.__is_white == other)

  def getColor(self):
    return self.__is_white
  
  def getMoves(self, position, board):
    return

  def yieldExpandedSteps(self, position, steps, board):
    for ystep,xstep in steps:
      y = position[0]+ystep
      x = position[1]+xstep
      while (board.isInside(y,x)):
        piece = board.getPiece((y,x))        
        yield piece, [y,x]
        if (piece is not None):
          break
        y = y + ystep
        x = x + xstep

  def yieldExpandedStepsOnes(self, position, steps, board):
    for ystep,xstep in steps:
      y = position[0]+ystep
      x = position[1]+xstep
      if (board.isInside(y,x)):
        piece = board.getPiece((y,x))
        yield piece

  def getDiagonalMoves(self, starting_position, board):
    moves = []
    for piece, position in self.yieldExpandedSteps(starting_position, self.diagonalMoves, board):
      move = [starting_position, position]
      # expandedBoard = self.board.copyNewBoardState(move, board)
      # if (not self.board.kingInCheck(expandedBoard)):
      try:
        if (piece.getColor() != self.getColor()):
          moves.append(move)
      except:
          moves.append(move)
    return moves

  def getHorizontalMoves(self, starting_position, board):
    moves = []
    for piece, position in self.yieldExpandedSteps(starting_position, self.horizontalMoves, board):
      move = [starting_position, position]
      #expandedBoard = self.board.copyNewBoardState(move, board)
      #if (not self.board.kingInCheck(expandedBoard)):
      try:
        if (piece.getColor() != self.getColor()):
          moves.append(move)
      except:
          moves.append(move)
    return moves

  def getMovesKnight(self, starting_position, board):
    moves = []
    for ystep,xstep in self.knightJumps:
      y = starting_position[0]+ystep
      x = starting_position[1]+xstep
      move = [starting_position, [y,x]]
      if (board.isInside(y,x)):
        try:
          if (board.getPiece([y,x]).getColor() != self.getColor()):
            #expandedBoard = self.board.copyNewBoardState(move, board)
            #if not (self.board.kingInCheck(expandedBoard)):
            #  moves.append([starting_position, (y,x)])
            moves.append(move)
        except: moves.append(move)
    return moves
  
  def move(self, move):
    self.position = move

  def getPosition(self):
    return self.position

## Bishop, Knight, Rook, Queen

In [13]:
class Bishop(Piece):
  def __init__(self, is_white = True):
    Piece.__init__(self, 3, is_white)
    self.diagonalMoves = [(1,1),(1,-1),(-1,1),(-1,-1)]
    if (is_white): self.value = 3
    else: self.value = -3

  def __repr__(self):
    if (self.getColor()):
      return "wBishop"#
    return "bBishop"
  
  def getMoves(self, starting_position, board):
    return Piece.getDiagonalMoves(self, starting_position, board)
        

class Knight(Piece):
  def __init__(self, is_white = True):
    Piece.__init__(self, 3, is_white)
    if (is_white): self.value = 3
    else: self.value = -3
  def __repr__(self):
    if (self.getColor()):
      return "wKnight"#
    return "bKnight"

  def getMoves(self, starting_position, board):
    
    return Piece.getMovesKnight(self, starting_position, board)

class Rook(Piece):
  def __init__(self, is_white = True):
    Piece.__init__(self, 5, is_white)
    self.has_moved = False
    if (is_white): self.value = 5
    else: self.value = -5

  def __repr__(self):
    if (self.getColor()):
      return "wRook"#
    return "bRook"
  
  def getMoves(self, starting_position, board):
    
    return Piece.getHorizontalMoves(self, starting_position, board)
  
  def move(self, destiantion):
    self.has_moved = True


class Queen(Piece):
  def __init__(self, is_white = True):
    Piece.__init__(self, 9, is_white)
    if (is_white): self.value = 9
    else: self.value = -9
  
  def __repr__(self):
    if (self.getColor()):
      return "wQueen"#
    return "bQueen"

  def getMoves(self, starting_position, board):
    #ist fraglich ob das sinn macht die methode über das piece zu callen
    #self.board.getMovesQueen(position, board)
    return Piece.getDiagonalMoves(self, starting_position, board) + Piece.getHorizontalMoves(self, starting_position, board)
    


##King

In [14]:
class King(Piece):
  def __init__(self, position, is_white = True):
    Piece.__init__(self, -1, is_white)
    self.position = position
    self.has_moved = False
    self.horizontalAttacker = ["bRook","bQueen","wRook","wQueen"]
    self.diagonalAttacker = ["bBishop","bQueen","wBishop","wQueen"]
    if (is_white): self.value = 100
    else: self.value = -100

  def __repr__(self):
    if (self.getColor()):
      return "wKing"
    return "bKing"

  def getMoves(self, starting_position, board):
    moves = []
    if (not self.has_moved):
      self.castling(starting_position, board, moves)
    for ystep, xstep in self.diagonalMoves + self.horizontalMoves:
      y = starting_position[0]+ystep
      x = starting_position[1]+xstep
      move = [starting_position,[y,x]]
      if (board.isInside(y,x)):
        try:
          if (board.getPiece([y,x]).getColor() != self.getColor()):
            moves.append(move)#[starting_position, (y,x)]
        except: moves.append(move)
    return moves
  
  def move(self, destination):
    self.has_moved = True
    self.position = destination

  def castling(self, starting_position, board, moves):
    if (self.isChecked(starting_position, board)): return
    start_y = starting_position[0]
    start_x = starting_position[1]
    for piece, _position in self.yieldExpandedSteps(starting_position, [(0,1)], board):
      try:
        if (str(piece) in ("bRook","wRook")[int(self.getColor())] and not piece.has_moved):# used to be board.hasMoved(piece)
          if (not self.isChecked((start_y, start_x +1), board) and not self.isChecked((start_y, start_x +2), board)):
            move = [[starting_position, (start_y, start_x+2)], [_position, (start_y, start_x + 1)]]
            moves.append(move)
      except Exception as e: 
        print("castling exception: {}".format(e))
        pass
    for piece, _position in self.yieldExpandedSteps(starting_position, [(0,-1)], board):
      try:
        if (str(piece) in ("bRook","wRook")[int(self.getColor())] and not piece.has_moved):
          if (not self.isChecked((start_y, start_x - 1), board) and not self.isChecked((start_y, start_x - 2), board)): 
            move = [[starting_position,(start_y, start_x - 2)], [_position,(start_y, start_x - 1)]]
            moves.append(move)
      except Exception as e: 
        print("castling exception: {}".format(e))
        pass
                
  def isChecked(self, starting_position, board, prin = False):
    if (prin): print("Kings position: {}".format(starting_position))
    if (prin): print("horizontal-checks")
    for piece, position in self.yieldExpandedSteps(starting_position, self.horizontalMoves, board):
      #if (piece is not None):
      try:
        if (piece.getColor() != self.getColor() and str(piece) in self.horizontalAttacker):
          if (prin): print("horizontal-checks from Piece at position: {}".format(piece, position))
          return True
      except:
        pass
    if (prin): print("diagonal-checks")
    for piece, position in self.yieldExpandedSteps(starting_position, self.diagonalMoves, board):
      if (piece is not None):
        try:
          if (piece.getColor() != self.getColor() and str(piece) in self.diagonalAttacker):
            if (prin): print("Diagonal check from piece {} at position : {}".format(piece, position))
            return True
        except:
          pass
    if (prin): print("Knight checks")
    for piece in self.yieldExpandedStepsOnes(starting_position, self.knightJumps, board):
      if (piece is not None):
        try:
          if (piece.getColor() != self.getColor() and str(piece) in ["wKnight","bKnight"]):
            if (prin): print("Knight at position : {}".format(piece))              
            return True
        except:
          pass
    if (prin): print("Pawn-checks")
    for y,x in [[(-1,1),(-1,-1)],[(1,1),(1,-1)]][int(self.getColor())]:
      y_pos = starting_position[0] + y
      x_pos = starting_position[1] + x
      if (prin): print(y_pos,x_pos)
      if (board.isInside(y_pos,x_pos)):
        try:
          p = board.getPiece((y_pos,x_pos))
          #print("---------------------------------------------------------------------------------------------------------------------PAWN at Positions: {}".format((y_pos,x_pos)))
          if (str(p) in ["wPawn", "bPawn"] and p.getColor() != self.getColor()):
            if (prin): print("pawn at position : {}".format((y_pos,x_pos)))            
            return True
        except:
          pass
    if (prin): print("Knight checks")
    for piece in self.yieldExpandedStepsOnes(starting_position, self.diagonalMoves + self.horizontalMoves, board):
      if (piece is not None):
        try:
          if (piece.getColor() != self.getColor() and str(piece) in ("wKing","bKing")):
            if (prin): print("King at position : {}".format(piece))            
            return True
        except:
          pass
    return False


##Pawn

In [15]:
class Pawn(Piece):
  def __init__(self, is_white = True):
    Piece.__init__(self, 1, is_white)
    self.has_moved = False
    if (is_white):
      self.direction = 1
      self.value = 1
    else: 
      self.direction = -1
      self.value = -1

  def __repr__(self):
    if (self.getColor()):
      return "wPawn"#
    return "bPawn"#"Pawn"

  def getMoves(self, starting_position, board):
    moves = []
    y = starting_position[0]
    x = starting_position[1]
    # Check ob man das bord verlassen würde
    if y in range(1,7):
      #Forwärts-Bewegung mit check für doppelschritt
      if (board.getPiece([y + self.direction, x]) is None):
        if (y + self.direction in [0,7]):
          moves.append([starting_position, (y + self.direction,x), Queen(self.getColor())])
          moves.append([starting_position, (y + self.direction,x), Knight(self.getColor())])
          moves.append([starting_position, (y + self.direction,x), Rook(self.getColor())])
          moves.append([starting_position, (y + self.direction,x), Bishop(self.getColor())])
        else:
          moves.append([starting_position, (y + self.direction,x)])
        if not (self.has_moved):
          if (board.getPiece([y + 2*self.direction, x]) is None):
              moves.append([starting_position,(y + 2*self.direction,x)])
      # Schlag-bewegungen
      if (x<7 and board.getPiece([y + self.direction, x + 1]) is not None):
        try:
          if (board.getPiece([y + self.direction, x + 1]).getColor() != self.getColor()):
            if (y + self.direction in [0,7]):
              moves.append([starting_position, (y + self.direction, x + 1), Queen(self.getColor())])
              moves.append([starting_position, (y + self.direction, x + 1), Knight(self.getColor())])
              moves.append([starting_position, (y + self.direction, x + 1), Rook(self.getColor())])
              moves.append([starting_position, (y + self.direction, x + 1), Bishop(self.getColor())])
            else:
              moves.append([starting_position, (y + self.direction, x + 1)])
        except: pass
      if (x>0 and board.getPiece([y + self.direction, x - 1]) is not None):
        try:
          if (board.getPiece([y + self.direction, x - 1]).getColor() != self.getColor()):
            #Promotion
            if (y + self.direction in [0,7]):
              moves.append([starting_position, (y + self.direction, x - 1), Queen(self.getColor())])
              moves.append([starting_position, (y + self.direction, x - 1), Knight(self.getColor())])
              moves.append([starting_position, (y + self.direction, x - 1), Rook(self.getColor())])
              moves.append([starting_position, (y + self.direction, x - 1), Bishop(self.getColor())])
            else:
              moves.append([starting_position, (y + self.direction, x - 1)])
        except: pass
      #On-passant implementation
      try:
        last_move = board.played_moves[-1]
      except:
        #print("Pawn-Onpassant exception, probably because no moved played as of yet")
        return moves
      # Potentielles problem mit castling da ein castling move aus 2 zügen besteht. On-passant ist danach eh nicht möglich also können wir in diesem falle on-passant züge überspringen
      try:
        if (last_move[1][0] in [[3,4][int(self.getColor())]]):
          _y,_x = last_move[1]
          if (x in [_x+1,_x-1] and y == _y):
            #second move to take the pawn
            # smove = [[None], last_move[1]]
            moves.append([starting_position,(y+self.direction, _x)])
      except: pass
    return moves
  
  def move(self, destination):
    self.has_moved = True
  


##Method to return one-hot encoded board

In [16]:
#vocab = {"None" : [1,0], "wPawn" : [1,1], "wRook" : [1,2], "wKnight" : [1,3], "wBishop" : [1,4], "wQueen" : [1,5], "wKing" : [1,6], "bPawn" : [2,1], "bRook" : [2,2], "bKnight" : [2,3], "bBishop" : [2,4], "bQueen" : [2,5], "bKing" : [2,6]}
vocab = {"None" : 0, "wPawn" : 1, "wRook" : 2, "wKnight" : 3, "wBishop" : 4, "wQueen" : 5, "wKing" : 6, "bPawn" : 7, "bRook" : 8, "bKnight" : 9, "bBishop" : 10, "bQueen" : 11, "bKing" : 12}

def transform_board(board):
  int_board = []
  for row in board:
    #for keeping the structure of the board
    int_board.append([vocab[str(piece)] for piece in row])  #shape (8,8,13)
    #for returning the board as one long tensor without structure 
    # for piece in row:
    #   int_board.append(vocab[str(piece)])                 #shape: (64,13)
  tf_board = tf.convert_to_tensor(int_board)
  oh_board = tf.one_hot(tf_board, 13)  
  return oh_board

bm = Boardmanager()
oh_board = transform_board(bm.board.board)
#print(oh_board)

#Shit

In [17]:
for i in range (5):
  addCountToDict(a.getID(True))
b = a.copyBoard()
addCountToDict(b)
b.movePiece(((0,1),(2,0)))
addCountToDict(b)
print(asizeof.asizeof(a.getID(True)))
print(dic)
print(dic.values())

NameError: ignored

In [18]:
import time
start = time.perf_counter()
ls = []
for i in range(1000000):
  bool(ls)#(len(ls)<1)#
end = time.perf_counter()
print('Total time :',end - start)
print(bool(ls))
print((len(ls)>1))
# b = Chessboard()
# a = b.copyBoard()
# a.movePiece(((0,1),(2,0)))
# b.movePiece(((0,1),(2,3)))
# # start = time.perf_counter()
# # for i in range(100000):
# #   (a.getID() == b.getID())
# # end = time.perf_counter()
# # print('Total time :',end - start)
# # print(a.getID() == b.getID())
# # print(a.getID())
# start = time.perf_counter()
# for i in range(100000):
#   np.all(a.board == b.board)
# end = time.perf_counter()
# print('Total time :',end - start)
# #print(asizeof.asizeof(a.board))
# id = a.getID()
# print(asizeof.asizeof(id))
# print(asizeof.asizeof(int(id)))
# print(np.all(a.board == b.board))

Total time : 0.15674134300002152
False
False


In [19]:
# class Buffer():
#   def __init__(self, size, keys):
#     self.buffer = {}
#     for k in keys:
#       self.buffer[k] = []
#     self.size = size
#     self.keys

#   def storeInBuffer(self,trajectory):
#     self.buffer.keys() = trajctory
#     dict_keys = list(data_dict.keys())

#         current_len = len(self.buffer[self.keys[0]])
#         add_len = len(data_dict[dict_keys[0]])
#         new_len = current_len + add_len

#         if new_len >= self.size:
#             pop_len = new_len - self.size

#             for k in self.buffer.keys():
#                 self.buffer[k] = self.buffer[k][pop_len:]

#         for k in dict_keys:
#             self.buffer[k].extend(data_dict[k])

#         return self.buffer


In [20]:
bm = Boardmanager()
agent = Agent(True,bm )
# agent.NN.get_weights()
agent.NN(bm.board.transform_board(bm.board.board),False)
print("__________________")
print(agent.NN.get_weights())

TypeError: ignored