In [1]:
import numpy as np
import random
import tensorflow as tf

In [2]:
class Board():
    
    def __init__(self, deck):
        
        self.deck = random.sample(deck, 5) # each game only 5 cards are selected from the deck
        self.player1_cards = self.deck[:2]
        self.player2_cards = self.deck[2:4]
        self.remaining_card = self.deck[4]
        self.board_size = 5 # 5x5 board        
        self.board_state = self.get_board_state(0)
        
        
    def get_board_state(self, color):
        """ Return the board state, a 5 x 5 x 5 
        the first 5x5 represent the board, while the first 4 planes represent the piece and the last one the player.
        Color 0 is the first player, color 1 the second"""
        
        board_2D = np.array([[ 1,  1,  2,  1,  1],
                             [ 0,  0,  0,  0,  0],
                             [ 0,  0,  0,  0,  0],
                             [ 0,  0,  0,  0,  0],
                             [-1, -1, -2, -1, -1]])
        
        board_state = np.zeros((5, 5, 5))
            
        for k, piece in enumerate([1, 2, -1, -2]):
            for i in range(5):
                for j in range(5):
                    if board_2D[i][j] == piece:
                        board_state[k][i][j] = 1
                        
        if color == 0:
            board_state[4] = np.zeros((5, 5))
        else:
            board_state[4] = np.ones((5, 5))
        
        return board_state
        
        
    def get_legal_moves(self, policy, color):
        """Returns all the legal moves for a policy obtained with the neural network """
        
        # transform to positive output
        policy = policy - np.min(policy)
        
        # Revmove all illegal moves
        # The illegal moves are only the one where there is already an ally piece on the board
        if color == 0:
            restricted_cases = self.board_state[0] + self.board_state[1]
            player_cards = self.player1_cards
        else:
            restricted_cases = self.board_state[2] + self.board_state[3]
            player_cards = self.player2_cards
            
        # we can perform element wise multiplication for each plane to get the legal moves
        for k in range(policy.shape[2]): # policy.shape[2] = 13 planes
            policy[:, :, k] = tf.multiply(policy[:, :, k], (1 - restricted_cases))
        

        
        # Remove impossible actions, like trying to predict a move from outside the board
        outside0 = np.array([[1., 1., 1., 1., 1.],
                             [1., 1., 1., 1., 1.],
                             [1., 1., 1., 1., 1.], # impossible to land here when playing the move (0, 2)
                             [1., 1., 1., 1., 1.],
                             [0., 0., 0., 0., 0.]]) 
        outside1 = np.array([[1., 1., 1., 1., 1.],
                             [1., 1., 1., 1., 1.],
                             [1., 1., 1., 1., 1.], # impossible to land here when playing the move (1, NE)
                             [0., 1., 1., 1., 1.],
                             [0., 0., 0., 0., 0.]])
        outside2 = np.array([[0., 1., 1., 1., 1.],
                             [0., 1., 1., 1., 1.],
                             [0., 1., 1., 1., 1.], # impossible to land here when playing the move (1, E)
                             [0., 1., 1., 1., 1.],
                             [0., 1., 1., 1., 1.]]) 
        outside3 = np.array([[0., 0., 0., 0., 0.], 
                             [0., 1., 1., 1., 1.],
                             [1., 1., 1., 1., 1.], # impossible to land here when playing the move (1, SE)
                             [1., 1., 1., 1., 1.],
                             [1., 1., 1., 1., 1.]]) 
        outside4 = np.array([[0., 0., 0., 0., 0.],
                             [1., 1., 1., 1., 1.],
                             [1., 1., 1., 1., 1.], # impossible to land here when playing the move (1, S)
                             [1., 1., 1., 1., 1.],
                             [1., 1., 1., 1., 1.]]) 
        outside5 = np.array([[0., 0., 0., 0., 0.],
                             [1., 1., 1., 1., 0.],
                             [1., 1., 1., 1., 1.], # impossible to land here when playing the move (1, 'SW')
                             [1., 1., 1., 1., 1.],
                             [1., 1., 1., 1., 1.]]) 
        outside6 = np.array([[1., 1., 1., 1., 0.],
                             [1., 1., 1., 1., 0.],
                             [1., 1., 1., 1., 0.], # impossible to land here when playing the move (1, 'W')
                             [1., 1., 1., 1., 0.],
                             [1., 1., 1., 1., 0.]]) 
        outside7 = np.array([[1., 1., 1., 1., 0.],
                             [1., 1., 1., 1., 0.], 
                             [1., 1., 1., 1., 0.], # impossible to land here when playing the move (1, 'NW')
                             [1., 1., 1., 1., 0.],
                             [1., 1., 1., 0., 0.]]) 
        outside8 = np.array([[0., 0., 1., 1., 1.],
                             [0., 0., 1., 1., 1.],
                             [0., 0., 1., 1., 1.], # impossible to land here when playing the move (2, 'E')
                             [0., 0., 1., 1., 1.],
                             [0., 0., 1., 1., 1.]]) 
        outside9 = np.array([[1., 1., 1., 0., 0.],
                             [1., 1., 1., 0., 0.],
                             [1., 1., 1., 0., 0.], # impossible to land here when playing the move (2, 'W')
                             [1., 1., 1., 0., 0.],
                             [1., 1., 1., 0., 0.]]) 
        outside10 = np.array([[1., 1., 1., 1., 1.],
                              [1., 1., 1., 1., 1.],
                              [1., 1., 1., 1., 1.], # impossible to land here when playing the move (2, 'N')
                              [0., 0., 0., 0., 0.],
                              [0., 0., 0., 0., 0.]]) 
        outside11 = np.array([[0., 0., 0., 0., 0.],
                              [0., 0., 1., 1., 1.],
                              [0., 0., 1., 1., 1.], # impossible to land here when playing the move ('2E', '1S')
                              [0., 0., 1., 1., 1.],
                              [0., 0., 1., 1., 1.]]) 
        outside12 = np.array([[0., 0., 0., 0., 0.],
                              [1., 1., 1., 0., 0.],
                              [1., 1., 1., 0., 0.], # impossible to land here when playing the move ('2W', '1S')
                              [1., 1., 1., 0., 0.],
                              [1., 1., 1., 0., 0.]]) 
         
        # These moves are always impossible.
        list_outside_moves = [outside0, outside1, outside2, outside3, outside4, outside5, outside6, outside7, outside8,
                      outside9, outside10, outside11, outside12]
        
        impossibles_moves = np.zeros((5,5,13))
        for i,move in enumerate(list_outside_moves):
            impossibles_moves[:, :, i] = move
        
        policy = tf.multiply(policy, impossibles_moves)
        
        
        # then keep only the possible moving, considering the 2 card the player have.
        print(player_cards)
        for cards in player_cards:
            print(cards.moves)
        
        # Then we normalize to [0, 1]
        policy = policy / np.sum(policy)
        
        return policy

    
    def is_win(self, player):
        """Check whether the given player has win"""
        
        # Player1 win if -2 is dead, or if 2 managed to reach the center of the last line
        if player == 1:
            
            if self.board_state[4][2] == 2:
                return True
            
            for i in range(self.board_size):
                for j in range(self.board_size):
                    if self.board_state[i][j] == -2:
                        return False
            return True
        
        # Player2 (-1) win if 2 is dead, or if -2 managed to reach the center of the first line
        else:
            if self.board_state[0][2] == -2:
                return True
            
            for i in range(self.board_size):
                for j in range(self.board_size):
                    if self.board_state[i][j] == 2:
                        return False
            return True
        

    def execute_move(self, move, color):
        """Perform the given move on the board"""

        pass

https://arxiv.org/pdf/1712.01815.pdf
    
"The action space for chess includes all legal destinations for all of the players’
pieces on the board;"

In [4]:
%run deck.ipynb

In [5]:
a = Board(deck)

# All possible moves can also be write like this

In [16]:
layer_code = {}
i = 0
for card in deck:
    for move in card.moves:
        if move not in layer_code :
            layer_code[move] = i
            i += 1
layer_code

{(0, 2): 0,
 (0, -1): 1,
 (1, -1): 2,
 (-1, -1): 3,
 (2, 1): 4,
 (-2, 1): 5,
 (-2, 0): 6,
 (-1, 1): 7,
 (1, 1): 8,
 (2, 0): 9,
 (0, 1): 10,
 (1, 0): 11,
 (-1, 0): 12}

In [227]:
policy = np.ones((5,5,13))

In [228]:
pol_dict_x = {1: np.zeros((5,1)),
              2: np.zeros((5,2))}
pol_dict_y = {1: np.zeros((1,5)),
              2: np.zeros((2,5))}

for move in layer_code:
    # if moves are positives
    x, y = move
    if x > 0:
        policy[:, :, layer_code[move]][:, : x] = pol_dict_x[x]

    if y > 0:
        policy[:, :, layer_code[move]][-y:] = pol_dict_y[y]

    # if moves are negatives
    if x < 0:
        policy[:, :, layer_code[move]][:, :-x] = pol_dict_x[-x] # bug ici

    if y < 0:
        policy[:, :, layer_code[move]][:-y] = pol_dict_y[-y]

In [229]:
for i, k in enumerate(layer_code):
    print(f"direction {i} equal layer {k}")
    print(policy[:, :, i])

direction 0 equal layer (0, 2)
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
direction 1 equal layer (0, -1)
[[0. 0. 0. 0. 0.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
direction 2 equal layer (1, -1)
[[0. 0. 0. 0. 0.]
 [0. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]]
direction 3 equal layer (-1, -1)
[[0. 0. 0. 0. 0.]
 [0. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]]
direction 4 equal layer (2, 1)
[[0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0.]]
direction 5 equal layer (-2, 1)
[[0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0.]]
direction 6 equal layer (-2, 0)
[[0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 1. 1. 1.]]
direction 7 equal layer (-1, 1)
[[0. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]
 [0. 0. 0. 0. 0.]]
direction 8 equal