In [40]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

In [41]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self): return self._board

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

In [42]:
border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16


In [43]:
## state can be represented as two numbers if 0,1 are considered as bit

In [44]:
import numpy as np

def state_to_board(state):
    binary_string = format(state, '050b')
    binary_array = np.array(list(map(int, binary_string))).reshape(2, 5, 5)

    board = np.zeros((5, 5), dtype=int)
    board[binary_array[0] == 1] = -1
    board[binary_array[1] == 1] = 1

    return board

def board_to_state(board):
    binary_array = np.zeros((2, 5, 5), dtype=int)
    
    binary_array[0][board == -1] = 1
    binary_array[1][board == 1] = 1

    binary_string = ''.join(map(str, binary_array.flatten()))
    return int(binary_string, 2)



rand_board = np.random.choice([-1, 0, 1], size=(5, 5), replace=True)
print('Board:')
print(rand_board)

rand_state = board_to_state(rand_board)
rand_board = state_to_board(rand_state)

print('\nState:')
print(rand_state)
print('\nBoard:')
print(state_to_board(rand_state))

Board:
[[ 1 -1 -1  1 -1]
 [-1 -1  0  0 -1]
 [ 0  1  0  0  0]
 [-1 -1  0 -1  0]
 [ 0  0  0  1  1]]

State:
484912564019203

Board:
[[ 1 -1 -1  1 -1]
 [-1 -1  0  0 -1]
 [ 0  1  0  0  0]
 [-1 -1  0 -1  0]
 [ 0  0  0  1  1]]


In [45]:
dict_rot = {
    (Move.TOP, 1): Move.RIGHT,
    (Move.TOP, 2): Move.BOTTOM,
    (Move.TOP, 3): Move.LEFT,
    (Move.BOTTOM, 1): Move.LEFT,
    (Move.BOTTOM, 2): Move.TOP,
    (Move.BOTTOM, 3): Move.RIGHT,
    (Move.LEFT, 1): Move.TOP,
    (Move.LEFT, 2): Move.RIGHT,
    (Move.LEFT, 3): Move.BOTTOM,
    (Move.RIGHT, 1): Move.BOTTOM,
    (Move.RIGHT, 2): Move.LEFT,
    (Move.RIGHT, 3): Move.TOP,
}

dict_flip = {
    Move.TOP: Move.TOP,
    Move.BOTTOM: Move.BOTTOM,
    Move.LEFT: Move.RIGHT,
    Move.RIGHT: Move.LEFT,
}

#rot_orario: (3, 4) -> (4, 1) -> (1, 0) -> (0, 3) -> (3, 4)
#: (xi, yi) -> (yi, 4 - xi)
#rot_anti_orario: (3, 4) -> (0, 3) -> (1, 0) -> (4, 1) -> (3, 4)
#: (xi, yi) -> (4 - yi, xi)

def rot(n_rot):
    def rot_n(from_pos, move):
        for _ in range(n_rot):
            from_pos = 4 - from_pos[1], from_pos[0]
        return from_pos, dict_rot[(move, n_rot)]
    return rot_n

def flip(from_pos, move):
    from_pos = 4 - from_pos[0], from_pos[1]
    return from_pos, dict_flip[move]

def flip_rot(n_rot):
    def flip_rot_n(from_pos, move):
        from_pos, move = rot(n_rot)(from_pos, move)
        return flip(from_pos, move)
    return flip_rot_n

rot1 = rot(1)
rot2 = rot(2)
rot3 = rot(3)
flip_rot1 = flip_rot(1)
flip_rot2 = flip_rot(2)
flip_rot3 = flip_rot(3)

verse_simmetries = [
    rot3,
    rot2,
    rot1,
    flip,
    flip_rot3,
    flip_rot2,
    flip_rot1,
]

inverse_simmetries = [
    rot1,
    rot2,
    rot3,
    flip,
    flip_rot1,
    flip_rot2,
    flip_rot3,
]

def check_simmetries(board, next_to_move, state_list):

    base_state = tuple(list(board.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, None

    R1 = np.rot90(board)
    base_state = tuple(list(R1.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 0

    R2 = np.rot90(R1)
    base_state = tuple(list(R2.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 1

    R3 = np.rot90(R2)
    base_state = tuple(list(R3.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 2
    
    F = np.fliplr(board)
    base_state = tuple(list(F.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 3
    
    FR1 = np.rot90(F)
    base_state = tuple(list(FR1.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 4
    
    FR2 = np.rot90(FR1)
    base_state = tuple(list(FR2.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 5
    
    FR3 = np.rot90(FR2)
    base_state = tuple(list(FR3.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 6
    
    return None

MOVES_SIMMETRIES = {} #(id_move, id_simmetry) -> id_move

for id_move in range(len(ALL_MOVES)):
    from_pos, move = ALL_MOVES[id_move]

    for id_simmetry in range(len(inverse_simmetries)):

        idx = None
        for i in range(len(ALL_MOVES)):
            if ALL_MOVES[i] == inverse_simmetries[id_simmetry](from_pos, move):
                idx = i
                break
        
        MOVES_SIMMETRIES[(id_move, id_simmetry)] = i

print(len(MOVES_SIMMETRIES))
print(len(ALL_MOVES) * 7)
c = 10
for k, v in MOVES_SIMMETRIES.items():
    print((k, v))
    c -= 1
    if c == 0: break

308
308
((0, 0), 31)
((0, 1), 18)
((0, 2), 27)
((0, 3), 41)
((0, 4), 39)
((0, 5), 21)
((0, 6), 14)
((1, 0), 30)
((1, 1), 17)
((1, 2), 28)


In [46]:
## to discard for the amount of possible states

## to change with a check that control if a state already exist, if yes retreive the q-values, if not it creates a random q-value for 
## each move for that state, if legal


#import itertools
#from tqdm import tqdm
#MATRIX_SIZE = 5
#
#count_all = 0
#for s in itertools.product([-1, 0, 1], repeat= pow(MATRIX_SIZE, 2)): count_all += 1
#print(count_all)
#print('--------------')
#
#states_list = []
#
#for s in tqdm(itertools.product([-1, 0, 1], repeat= pow(MATRIX_SIZE, 2))):
#    if check_simmetries(np.array(s).reshape(MATRIX_SIZE, MATRIX_SIZE), states_list) is None:
#        states_list.append(tuple(s))
#    
#    #if count_all % 100 == 0:
#    #    print((len(states_list), count_all))
#
#print(count_all)
#print(len(states_list))

In [47]:
class MyPlayer(Player):
    def __init__(self, eps= 2, simulations_on_new= 1, base_until_draw= 10) -> None:
        super().__init__()

        self.eps = eps
        self.simulation_on_new = simulations_on_new

        self.states_dict = {}

        self.dummy = Dummy_Game()

        self.train_init()

        self.base_until_draw = base_until_draw
        self.last_pos_move = None
        self.until_draw = self.base_until_draw
        self.n_draws = 0

        self.tot_count = 0
        self.random_count = 0

    def get_random_count(self): return self.random_count
    def get_tot_count(self): return self.tot_count
    def get_n_draws(self): return self.n_draws
    def reset_counters(self):
        self.n_draws = 0
        self.tot_count = 0
        self.random_count = 0

    def expansion(self, board, player_id):

        # get legal childrens of a node, child: (move_to_child, child_state)

        children = []
        for from_pos, move in ALL_MOVES:
            new_board, ok = self.dummy.single_move(board, from_pos, move, player_id)
            if ok: children.append(((from_pos, move), tuple(list(new_board.flatten()) + [1 - player_id])))

        return children
    
    def simulation(self, base_board, player_id):

        # random_simulation from a certain state to the end, return outcome based on who is making the first move

        win_count = 0

        for _ in range(self.simulation_on_new):

            next_to_play = player_id
            board = deepcopy(base_board)

            winner = self.dummy.check_winner_board(board)
            while winner == -1:
                #print('turn start')
                #print(board)
                #print(f'player {next_to_play}')

                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)
                while not ok:
                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                    new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)

                #print(f'do {(from_pos, move)}')

                board = new_board
                next_to_play = 1 - next_to_play

                winner = self.dummy.check_winner_board(board)

            if winner == player_id: win_count += 1
        
        return win_count, self.simulation_on_new
    
    def update(self, states_to_update, win_count, count):

        #print('update')

        for state in states_to_update:
            if state in self.states_dict:
                self.states_dict[state][0] += win_count
                self.states_dict[state][1] += count
            else: self.states_dict[state] = [win_count, count, []]

    def selection(self, current_state, training= False):

        #print('selection')

        if current_state not in self.states_dict:
            if training:
                print("STATE NOT IN STATES_DICT -> SHOULDN'T HAPPEN")
                return None
            else: return None

        parent = self.states_dict[current_state]
        parent_count = parent[1]
        childrens = parent[2]

        #print(current_state)
        #print(len(childrens))

        if len(childrens) == 0:
            if training: return False, current_state
            else: return None

        has_childs = []
        values = []
        for _, child_state in childrens:
            child = self.states_dict[child_state]
            wi = child[0]
            ci = child[1]
            has_childs.append(len(child[2]) > 0)

            if training: values.append((wi / ci) + np.sqrt(self.eps * np.log(parent_count) / ci))
            else: values.append(wi / ci)

        #print(len(values))

        best_id = np.argmax(values)

        #print(best_id)

        if training: return has_childs[best_id], childrens[best_id][1]
        else: return childrens[best_id][0]

    def train_init(self):
        #print('train_init')

        starting_board = np.ones((5, 5), dtype= np.uint8) * -1
        first_to_move = 0

        win_count, count = self.simulation(starting_board, first_to_move)

        starting_state = tuple(list(starting_board.flatten()) + [0])

        self.update([starting_state], win_count, count)

        #print('train_init_end')
    
    def train_wrapper(self, training_epochs= 10):

        #print('train_wrapper')

        max_depth = 0

        for _ in tqdm(range(training_epochs)):

            board = np.ones((5, 5), dtype= np.uint8) * -1
            state = tuple(list(board.flatten()) + [0])
            next_to_move = 0
            depth = 0

            path = [state]

            has_child, chosen = self.selection(state, training= True)
            path.append(chosen)
            next_to_move = 1 - next_to_move
            while has_child:
                depth += 1
                if depth > max_depth: max_depth = depth
                has_child, chosen = self.selection(chosen, training= True)
                path.append(chosen)
                next_to_move = 1 - next_to_move

            board = np.array(chosen)[:-1].reshape(5, 5)

            children = self.expansion(board, next_to_move)

            for child in children:
                new_state = child[1]
                new_board = np.array(new_state)[:-1].reshape(5, 5)
                win_count, count = self.simulation(new_board, 1 - next_to_move)
                self.update([new_state], win_count, count)
                self.update(path, win_count, count)
                self.states_dict[chosen][2].append(child)

        print(f'max depth reached: {max_depth}')

        #print('train_wrapper_end')

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        self.tot_count += 1

        pos_move = self.selection(tuple(list(game.get_board().flatten()) + [game.current_player_idx]))

        if pos_move is None: rand = True
        elif pos_move == self.last_pos_move:
            self.until_draw -= 1
            if self.until_draw == 0:
                self.n_draws += 1
                rand = True
                self.until_draw = self.base_until_draw
        else:
            rand = False
            self.last_move = pos_move

        if rand:
            self.random_count += 1
            board = game.get_board()
            player_id = game.current_player_idx
            from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
            while board[from_pos[1], from_pos[0]] == 1 - player_id: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
        else: from_pos, move = pos_move
        
        return from_pos, move

In [50]:
mc = MyPlayer(simulations_on_new= 5)
mc.train_wrapper(1000)

ThePlayer = mc
mc.reset_counters()

wins_first = 0
wins_second = 0
n_trials = 1000

player1 = ThePlayer
player2 = RandomPlayer()

for _ in tqdm(range(n_trials)):
    
    g = Game()
    winner = g.play(player1, player2)
    if winner == 0: wins_first += 1

    g = Game()
    winner = g.play(player2, player1)
    if winner == 1: wins_second += 1

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")
print(f'player played {mc.get_random_count()} random moves over {mc.get_tot_count()} moves')
print(len(ThePlayer.states_dict))

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [06:03<00:00,  2.75it/s]


max depth reached: 2


100%|██████████| 1000/1000 [00:05<00:00, 183.17it/s]

Player won 568 / 1000 as first
Player won 513 / 1000 as second
player played 46909 random moves over 47909 moves
11495





In [49]:
mc = MyPlayer(eps= 0, simulations_on_new= 5)
mc.train_wrapper(1000)

ThePlayer = mc
mc.reset_counters()

wins_first = 0
wins_second = 0
n_trials = 1000

player1 = ThePlayer
player2 = RandomPlayer()

for _ in tqdm(range(n_trials)):
    
    g = Game()
    winner = g.play(player1, player2)
    if winner == 0: wins_first += 1

    g = Game()
    winner = g.play(player2, player1)
    if winner == 1: wins_second += 1

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")
print(f'player played {mc.get_random_count()} random moves over {mc.get_tot_count()} moves')
print(len(ThePlayer.states_dict))

100%|██████████| 1000/1000 [05:31<00:00,  3.02it/s]


max depth reached: 10


100%|██████████| 1000/1000 [00:05<00:00, 184.82it/s]

Player won 568 / 1000 as first
Player won 497 / 1000 as second
player played 46774 random moves over 47787 moves
20493



