In [28]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

In [29]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self): return self._board

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

In [30]:
border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16


In [31]:
## state can be represented as two numbers if 0,1 are considered as bit

In [32]:
import numpy as np

def state_to_board(state):
    binary_string = format(state, '050b')
    binary_array = np.array(list(map(int, binary_string))).reshape(2, 5, 5)

    board = np.zeros((5, 5), dtype=int)
    board[binary_array[0] == 1] = -1
    board[binary_array[1] == 1] = 1

    return board

def board_to_state(board):
    binary_array = np.zeros((2, 5, 5), dtype=int)
    
    binary_array[0][board == -1] = 1
    binary_array[1][board == 1] = 1

    binary_string = ''.join(map(str, binary_array.flatten()))
    return int(binary_string, 2)



rand_board = np.random.choice([-1, 0, 1], size=(5, 5), replace=True)
print('Board:')
print(rand_board)

rand_state = board_to_state(rand_board)
rand_board = state_to_board(rand_state)

print('\nState:')
print(rand_state)
print('\nBoard:')
print(state_to_board(rand_state))

Board:
[[-1  0 -1  0  0]
 [ 0  0 -1  0  0]
 [ 0  1 -1  1  1]
 [-1  0  0  1  1]
 [ 1 -1 -1  0  0]]

State:
708240509774960

Board:
[[-1  0 -1  0  0]
 [ 0  0 -1  0  0]
 [ 0  1 -1  1  1]
 [-1  0  0  1  1]
 [ 1 -1 -1  0  0]]


In [33]:
dict_rot = {
    (Move.TOP, 1): Move.LEFT,
    (Move.TOP, 2): Move.BOTTOM,
    (Move.TOP, 3): Move.RIGHT,
    (Move.BOTTOM, 1): Move.RIGHT,
    (Move.BOTTOM, 2): Move.TOP,
    (Move.BOTTOM, 3): Move.LEFT,
    (Move.LEFT, 1): Move.TOP,
    (Move.LEFT, 2): Move.RIGHT,
    (Move.LEFT, 3): Move.BOTTOM,
    (Move.RIGHT, 1): Move.BOTTOM,
    (Move.RIGHT, 2): Move.LEFT,
    (Move.RIGHT, 3): Move.TOP,
}

dict_flip = {
    Move.TOP: Move.TOP,
    Move.BOTTOM: Move.BOTTOM,
    Move.LEFT: Move.RIGHT,
    Move.RIGHT: Move.LEFT,
}

#rot_orario: (3, 4) -> (4, 1) -> (1, 0) -> (0, 3) -> (3, 4)
#: (xi, yi) -> (yi, 4 - xi)
#rot_anti_orario: (3, 4) -> (0, 3) -> (1, 0) -> (4, 1) -> (3, 4)
#: (xi, yi) -> (4 - yi, xi)

def rot(n_rot):
    def rot_n(from_pos, move):
        for _ in range(n_rot):
            from_pos = 4 - from_pos[1], from_pos[0]
        return from_pos, dict_rot[(move, n_rot)]
    return rot_n

def flip(from_pos, move):
    from_pos = 4 - from_pos[0], from_pos[1]
    return from_pos, dict_flip[move]

def flip_rot(n_rot):
    def flip_rot_n(from_pos, move):
        from_pos, move = rot(n_rot)(from_pos, move)
        return flip(from_pos, move)
    return flip_rot_n

rot1 = rot(1)
rot2 = rot(2)
rot3 = rot(3)
flip_rot1 = flip_rot(1)
flip_rot2 = flip_rot(2)
flip_rot3 = flip_rot(3)

verse_simmetries = [
    rot3,
    rot2,
    rot1,
    flip,
    flip_rot3,
    flip_rot2,
    flip_rot1,
]

inverse_simmetries = [
    rot1,
    rot2,
    rot3,
    flip,
    flip_rot1,
    flip_rot2,
    flip_rot3,
]

def check_simmetries(board, state_list):

    if tuple(board.flatten()) in state_list: return tuple(board.flatten()), None

    R1 = np.rot90(board)
    if tuple(R1.flatten()) in state_list: return tuple(R1.flatten()), 0

    R2 = np.rot90(R1)
    if tuple(R2.flatten()) in state_list: return tuple(R2.flatten()), 1

    R3 = np.rot90(R2)
    if tuple(R3.flatten()) in state_list: return tuple(R3.flatten()), 2
    
    F = np.fliplr(board)
    if tuple(F.flatten()) in state_list: return tuple(F.flatten()), 3
    
    FR1 = np.rot90(F)
    if tuple(FR1.flatten()) in state_list: return tuple(FR1.flatten()), 4
    
    FR2 = np.rot90(FR1)
    if tuple(FR2.flatten()) in state_list: return tuple(FR2.flatten()), 5
    
    FR3 = np.rot90(FR2)
    if tuple(FR3.flatten()) in state_list: return tuple(FR3.flatten()), 6
    
    return None

MOVES_SIMMETRIES = {} #(id_move, id_simmetry) -> id_move

for id_move in range(len(ALL_MOVES)):
    from_pos, move = ALL_MOVES[id_move]

    for id_simmetry in range(len(inverse_simmetries)):

        idx = None
        for i in range(len(ALL_MOVES)):
            if ALL_MOVES[i] == inverse_simmetries[id_simmetry](from_pos, move):
                idx = i
                break
        
        MOVES_SIMMETRIES[(id_move, id_simmetry)] = i

print(len(MOVES_SIMMETRIES))
print(len(ALL_MOVES) * 7)

308
308


In [34]:
class MyPlayer(Player):
    def __init__(self, use_simmetries= False, use_random_games= False, eps_greedy= False, eps= 2, add_sim_moves= False, add_n_moves= False, add_eval= False, alpha= 1, simulations_on_new= 1, base_until_move_change= 5) -> None:
        super().__init__()

        self.use_simmetries= use_simmetries

        self.use_random_games= use_random_games

        self.eps_greedy= eps_greedy
        self.eps = eps

        self.add_sim_moves= add_sim_moves

        self.add_n_moves = add_n_moves

        self.add_eval= add_eval
        self.alpha = alpha

        self.simulation_on_new = simulations_on_new

        self.states_dict = {}
        self.dummy = Dummy_Game()

        self.base_until_move_change = base_until_move_change
        self.last_pos_move = None
        self.until_move_change = self.base_until_move_change

        self.tot_count = 0
        self.random_count = 0

        self.expansion = self.expansion_simm if self.use_simmetries else self.expansion_no_simm

        if self.add_sim_moves:
            self.simulation = self.simulation_O_S if self.use_simmetries else self.simulation_O_no_S
        else: self.simulation = self.simulation_no_O_no_S

        self.update = self.update_add_n_moves if self.add_n_moves else self.update_norm

        if self.eps_greedy: self.selection = self.selection_R_A if self.add_eval else self.selection_R_no_A
        else: self.selection = self.selection_no_R_A if self.add_eval else self.selection_no_R_no_A

        if self.use_random_games:
            self.train_init = self.train_init_norm
            self.train_wrapper = self.train_wrapper_rand_simm if self.use_simmetries else self.train_wrapper_rand_no_simm
        elif self.add_sim_moves:
            self.train_init = self.train_init_moves_simu
            self.train_wrapper = self.train_wrapper_moves_simu
        else:
            self.train_init = self.train_init_norm
            self.train_wrapper = self.train_wrapper_norm

        self.train_init()

    def get_random_count(self): return self.random_count
    def get_tot_count(self): return self.tot_count
    def reset_counters(self):
        self.tot_count = 0
        self.random_count = 0
        self.until_move_change = self.base_until_move_change

    def expansion_simm(self, board, next_to_move):

        # get legal childrens of a node, child: (move_to_child, child_state)

        children = []
        for from_pos, move in ALL_MOVES:
            new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_move)
            if ok:
                new_state = tuple(new_board.flatten())
                
                ######## S
                simmetry = check_simmetries(new_board, self.states_dict)
                if simmetry is None: id_simmetry = None
                else: new_state, id_simmetry = simmetry
                ######## S

                children.append(((from_pos, move), new_state))

        return children

    def expansion_no_simm(self, board, next_to_move):

        # get legal childrens of a node, child: (move_to_child, child_state)

        children = []
        for from_pos, move in ALL_MOVES:
            new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_move)
            if ok:
                new_state = tuple(new_board.flatten())
                children.append(((from_pos, move), new_state))

        return children
    
    def simulation_no_O_no_S(self, base_board, next_to_move):

        # random simulation from a certain state to the end, return outcomes

        win_0 = 0
        win_1 = 0

        for _ in range(self.simulation_on_new):

            next_to_play = next_to_move
            board = deepcopy(base_board)

            winner = self.dummy.check_winner_board(board)
            while winner == -1:

                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)
                while not ok:
                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                    new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)

                board = new_board
                next_to_play = 1 - next_to_play

                winner = self.dummy.check_winner_board(board)

            if winner == 0: win_0 += 1
            else: win_1 += 1

        return win_0, win_1, self.simulation_on_new

    def simulation_O_no_S(self, base_board, next_to_move):

        # random simulation from a certain state to the end, return outcomes

        win_0 = 0
        win_1 = 0

        ##### O
        path = []
        ##### O

        for _ in range(self.simulation_on_new):

            next_to_play = next_to_move
            board = deepcopy(base_board)

            winner = self.dummy.check_winner_board(board)
            while winner == -1:

                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)
                while not ok:
                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                    new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)

                board = new_board
                next_to_play = 1 - next_to_play

                winner = self.dummy.check_winner_board(board)

                ##### O
                path.append(tuple(board.flatten()))
                ##### O

            if winner == 0: win_0 += 1
            else: win_1 += 1
        
        ##### O
        return path, win_0, win_1, self.simulation_on_new
        #return win_0, win_1, self.simulation_on_new
        ##### O

    def simulation_O_S(self, base_board, next_to_move):

        # random simulation from a certain state to the end, return outcomes

        win_0 = 0
        win_1 = 0

        ##### O
        path = []
        ##### O

        for _ in range(self.simulation_on_new):

            next_to_play = next_to_move
            board = deepcopy(base_board)

            winner = self.dummy.check_winner_board(board)
            while winner == -1:

                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)
                while not ok:
                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                    new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)

                board = new_board
                next_to_play = 1 - next_to_play

                winner = self.dummy.check_winner_board(board)

                state = tuple(board.flatten())

                ######## S
                simmetry = check_simmetries(board, self.states_dict)
                if simmetry is not None: state, _ = simmetry
                ######## S

                ##### O
                path.append(state)
                ##### O

            if winner == 0: win_0 += 1
            else: win_1 += 1
        
        ##### O
        return path, win_0, win_1, self.simulation_on_new
        #return win_0, win_1, self.simulation_on_new
        ##### O
    
    def update_norm(self, states_to_update, win_0, win_1, count):

        player_responsible = 0

        for state in states_to_update:
            player_responsible = 1 - player_responsible
            amount = win_0 if player_responsible == 0 else win_1
            if state in self.states_dict:
                self.states_dict[state][0] += amount
                self.states_dict[state][1] += count
            else: self.states_dict[state] = [amount, count, []]
    
    def update_add_n_moves(self, states_to_update, win_0, win_1, count):

        player_responsible = 0

        ##### M
        n_move = 0
        ##### M

        for state in states_to_update:
            player_responsible = 1 - player_responsible

            ##### M
            if player_responsible == 1: n_move += 1
            amount = win_0 * n_move if player_responsible == 0 else win_1 * n_move
            #amount = win_0 if player_responsible == 0 else win_1
            ##### M

            if state in self.states_dict:
                self.states_dict[state][0] += amount
                self.states_dict[state][1] += count
            else: self.states_dict[state] = [amount, count, []]

    ##### A
    def evaluate_board(self, board):

        player_id = 1 if sum(sum(board < 0)) % 2 == 0 else 0 # (pari -> 1 ha giocato, dispari -> 0 ha giocato), si valuta chi ha appena giocato

        bonus = 0
        diag_a_player = 0
        diag_a_enemy = 0
        diag_b_player = 0
        diag_b_enemy = 0
        for i in range(5):
            
            line = board[i, :]
            bonus += pow(sum(line == player_id), 2) - pow(sum(line == 1 - player_id), 2)
            #bonus += sum(line == player_id) - sum(line == 1 - player_id)

            line = board[:, i]
            bonus += pow(sum(line == player_id), 2) - pow(sum(line == 1 - player_id), 2)
            #bonus += sum(line == player_id) - sum(line == 1 - player_id)
            
            if board[i, i] == player_id: diag_a_player += 1
            elif board[i, i] == 1 - player_id: diag_a_enemy += 1
            
            if board[i, 4-i] == player_id: diag_b_player += 1
            elif board[i, 4-i] == 1 - player_id: diag_b_enemy += 1

        bonus += pow(diag_a_player, 2) - pow(diag_a_enemy, 2)
        #bonus += diag_a_player - diag_a_enemy
            
        bonus += pow(diag_b_player, 2) - pow(diag_b_enemy, 2)
        #bonus += diag_b_player - diag_b_enemy

        #bonus += 15 if board[2, 2] == current_player else -16 # the center of the board is used in four winning combination

        return bonus / 300
    ##### A

    def selection_no_R_no_A(self, current_state, path= None):

        training = path is not None
        #if not training: path = []

        if current_state not in self.states_dict:
            if training:
                print("STATE NOT IN STATES_DICT -> IT SHOULDN'T HAPPEN")
                return None
            else: return None

        parent = self.states_dict[current_state]
        parent_count = parent[1]
        childrens = parent[2]

        if len(childrens) == 0: return None
            
        has_childs = []
        values = []
        board = np.array(current_state).reshape(5, 5)
        for _, child_state in childrens:
            #if child_state not in path:
                child = self.states_dict[child_state]
                wi = child[0]
                ci = child[1]
                has_childs.append(len(child[2]) > 0)

                if training: values.append((wi / ci) + np.sqrt(self.eps * np.log(parent_count) / ci))
                else: values.append(wi / ci)

        best_id = np.argmax(values)

        if training: return childrens[best_id][1]
        else: return childrens[best_id][0]

    def selection_R_no_A(self, current_state, path= None):

        training = path is not None
        #if not training: path = []

        if current_state not in self.states_dict:
            if training:
                print("STATE NOT IN STATES_DICT -> IT SHOULDN'T HAPPEN")
                return None
            else: return None

        parent = self.states_dict[current_state]
        parent_count = parent[1]
        childrens = parent[2]

        if len(childrens) == 0: return None

        ##### R
        if training:
            if np.random.random() < self.eps:
                return childrens[np.random.randint(0, len(childrens))][1]
        ##### R
            
        has_childs = []
        values = []
        board = np.array(current_state).reshape(5, 5)
        for _, child_state in childrens:
            #if child_state not in path:
                child = self.states_dict[child_state]
                wi = child[0]
                ci = child[1]
                has_childs.append(len(child[2]) > 0)

                if training: values.append((wi / ci))# + np.sqrt(self.eps * np.log(parent_count) / ci))   ##### not R
                else: values.append(wi / ci)

        best_id = np.argmax(values)

        if training: return childrens[best_id][1]
        else: return childrens[best_id][0]

    def selection_no_R_A(self, current_state, path= None):

        training = path is not None
        #if not training: path = []

        if current_state not in self.states_dict:
            if training:
                print("STATE NOT IN STATES_DICT -> IT SHOULDN'T HAPPEN")
                return None
            else: return None

        parent = self.states_dict[current_state]
        parent_count = parent[1]
        childrens = parent[2]

        if len(childrens) == 0: return None
            
        has_childs = []
        values = []
        board = np.array(current_state).reshape(5, 5)
        for _, child_state in childrens:
            #if child_state not in path:
                child = self.states_dict[child_state]
                wi = child[0]
                ci = child[1]
                has_childs.append(len(child[2]) > 0)

                if training: values.append((wi / ci) + np.sqrt(self.eps * np.log(parent_count) / ci))
                else: values.append(wi / ci)

                ##### A
                values[-1] += self.alpha * self.evaluate_board(np.array(child_state).reshape(5, 5))
                ##### A

        best_id = np.argmax(values)

        if training: return childrens[best_id][1]
        else: return childrens[best_id][0]

    def selection_R_A(self, current_state, path= None):

        training = path is not None
        #if not training: path = []

        if current_state not in self.states_dict:
            if training:
                print("STATE NOT IN STATES_DICT -> IT SHOULDN'T HAPPEN")
                return None
            else: return None

        parent = self.states_dict[current_state]
        parent_count = parent[1]
        childrens = parent[2]

        if len(childrens) == 0: return None

        ##### R
        if training:
            if np.random.random() < self.eps:
                return childrens[np.random.randint(0, len(childrens))][1]
        ##### R
            
        has_childs = []
        values = []
        board = np.array(current_state).reshape(5, 5)
        for _, child_state in childrens:
            #if child_state not in path:
                child = self.states_dict[child_state]
                wi = child[0]
                ci = child[1]
                has_childs.append(len(child[2]) > 0)

                if training: values.append((wi / ci))# + np.sqrt(self.eps * np.log(parent_count) / ci))   ##### not R
                else: values.append(wi / ci)

                ##### A
                values[-1] += self.alpha * self.evaluate_board(np.array(child_state).reshape(5, 5))
                ##### A

        best_id = np.argmax(values)

        if training: return childrens[best_id][1]
        else: return childrens[best_id][0]

    def train_init_norm(self):

        starting_board = np.ones((5, 5), dtype= np.uint8) * -1

        win_0, win_1, count = self.simulation(starting_board, 0)

        starting_state = tuple(starting_board.flatten())

        self.update([starting_state], win_0, win_1, count)

    def train_init_moves_simu(self):

        starting_board = np.ones((5, 5), dtype= np.uint8) * -1

        ##### O
        path, win_0, win_1, count = self.simulation(starting_board, 0)
        #win_0, win_1, count = self.simulation(starting_board, 0)
        ##### O

        starting_state = tuple(starting_board.flatten())

        ##### O
        self.update([starting_state] + path, win_0, win_1, count)
        #self.update([starting_state], win_0, win_1, count)
        ##### O

    def train_wrapper_norm(self, training_epochs= 10):

        max_depth = 0

        for _ in tqdm(range(training_epochs)):
        #for _ in range(training_epochs):

            
            board = np.ones((5, 5), dtype= np.uint8) * -1
            state = tuple(board.flatten())
            next_to_move = 0

            path = [state]

            chosen = self.selection(state, path= path)
            while chosen is not None:
                path.append(chosen)
                next_to_move = 1 - next_to_move
                chosen = self.selection(chosen, path= path)
            chosen = path[-1]

            #print('==================================================')
            #print('==================================================')
            #print(len(path))
            #print(next_to_move)
            #print('---------')

            if len(path) > max_depth: max_depth = len(path)

            board = np.array(chosen).reshape(5, 5)

            children = self.expansion(board, next_to_move)

            for child in children:
                new_state = child[1]
                new_board = np.array(new_state).reshape(5, 5)
                win_0, win_1, count = self.simulation(new_board, 1 - next_to_move)
                new_path = deepcopy(path)
                new_path.append(new_state)
                self.update(new_path, win_0, win_1, count)
                self.states_dict[chosen][2].append(child)

        print(f'max depth: {max_depth}')
        print(f'states explored: {len(self.states_dict)}')

    def train_wrapper_moves_simu(self, training_epochs= 10):

        max_depth = 0

        for _ in tqdm(range(training_epochs)):
        #for _ in range(training_epochs):

            
            board = np.ones((5, 5), dtype= np.uint8) * -1
            state = tuple(board.flatten())
            next_to_move = 0

            path = [state]

            chosen = self.selection(state, path= path)
            while chosen is not None:
                path.append(chosen)
                next_to_move = 1 - next_to_move
                chosen = self.selection(chosen, path= path)
            chosen = path[-1]

            #print('==================================================')
            #print('==================================================')
            #print(len(path))
            #print(next_to_move)
            #print('---------')

            if len(path) > max_depth: max_depth = len(path)

            board = np.array(chosen).reshape(5, 5)

            children = self.expansion(board, next_to_move)

            for child in children:
                new_state = child[1]
                new_board = np.array(new_state).reshape(5, 5)

                ##### O
                rand_path, win_0, win_1, count = self.simulation(new_board, 1 - next_to_move)
                #win_0, win_1, count = self.simulation(new_board, 1 - next_to_move)
                ##### O
                new_path = deepcopy(path)
                new_path.append(new_state)
                ##### O
                self.update(new_path + rand_path, win_0, win_1, count)
                #self.update(new_path, win_0, win_1, count)
                ##### O
                self.states_dict[chosen][2].append(child)

        print(f'max depth: {max_depth}')
        print(f'states explored: {len(self.states_dict)}')

    def train_wrapper_rand_no_simm(self, n_games= 10):

        for _ in tqdm(range(n_games)):

            board = np.ones((5, 5), dtype= np.uint8) * -1
            new_state = tuple(board.flatten())
            next_to_move = 1

            path = []

            winner = -1
            while winner == -1:

                state = new_state

                node = self.states_dict[state]
                next_to_move = 1 - next_to_move
                
                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                board, ok = self.dummy.single_move(board, from_pos, move, next_to_move)
                while not ok:

                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                    board, ok = self.dummy.single_move(board, from_pos, move, next_to_move)

                new_state = tuple(board.flatten())

                path.append(new_state)

                if ((from_pos, move), new_state) not in node[2]: node[2].append(((from_pos, move), new_state))

                winner = self.dummy.check_winner_board(board)

            self.update(path, 1 - winner, winner, 1)

        print(f'states explored: {len(self.states_dict)}')

    def train_wrapper_rand_simm(self, n_games= 10):

        for _ in tqdm(range(n_games)):

            board = np.ones((5, 5), dtype= np.uint8) * -1
            new_state = tuple(board.flatten())
            next_to_move = 1

            path = []

            winner = -1
            while winner == -1:

                state = new_state

                ######## S
                simmetry = check_simmetries(board, self.states_dict)
                if simmetry is None: 
                    self.states_dict[state] = [0, 0, []]
                    id_simmetry = None
                else: state, id_simmetry = simmetry
                #if state not in self.states_dict: self.states_dict[state] = [0, 0, []]
                ######## S

                node = self.states_dict[state]
                next_to_move = 1 - next_to_move
                
                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                ######## S
                if id_simmetry is not None: from_pos, move = inverse_simmetries[id_simmetry](from_pos, move)
                ######## S

                board, ok = self.dummy.single_move(board, from_pos, move, next_to_move)
                while not ok:

                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                    ######## S
                    if id_simmetry is not None: from_pos, move = inverse_simmetries[id_simmetry](from_pos, move)
                    ######## S

                    board, ok = self.dummy.single_move(board, from_pos, move, next_to_move)

                new_state = tuple(board.flatten())

                path.append(new_state)

                if ((from_pos, move), new_state) not in node[2]: node[2].append(((from_pos, move), new_state))

                winner = self.dummy.check_winner_board(board)

            self.update(path, 1 - winner, winner, 1)

        print(f'states explored: {len(self.states_dict)}')

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        self.tot_count += 1

        board = game.get_board()
        state = tuple(board.flatten())

        ######## S
        if self.use_simmetries:
            simmetry = check_simmetries(board, self.states_dict)
            if simmetry is not None: state, id_simmetry = simmetry
            else: id_simmetry = None
        ######## S

        pos_move = self.selection(state)

        ######## S
        if self.use_simmetries:
            if id_simmetry is not None and pos_move is not None: pos_move = inverse_simmetries[id_simmetry](pos_move[0], pos_move[1])
        ######## S

        rand = False
        if pos_move is None: rand = True
        elif pos_move == self.last_pos_move:
            self.until_move_change -= 1
            if self.until_move_change == 0:
                rand = True
                self.until_move_change = self.base_until_move_change
        else: self.last_pos_move = pos_move

        if rand:
            self.random_count += 1
            board = game.get_board()
            player_id = game.current_player_idx
            from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
            while board[from_pos[1], from_pos[0]] == 1 - player_id: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
        else: from_pos, move = pos_move
        
        #print((from_pos, move))
        return from_pos, move

In [24]:
#use_simmetries
#use_random_games
#eps_greedy
#eps 
#add_sim_moves
#add_n_moves
#add_eval
#alpha
#simulations_on_new

mc = MyPlayer(
    use_simmetries= True,
    use_random_games= False,
    eps_greedy= False,
    eps= 0.3,
    add_sim_moves= True,
    add_n_moves= False,
    add_eval= False,
    alpha= 1,
    simulations_on_new= 1
)
mc.train_wrapper(1000)

ThePlayer = mc

wins_first = 0
wins_second = 0
n_trials = 1000

player1 = ThePlayer
player2 = RandomPlayer()

count_rand_1 = 0
count_tot_1 = 0
count_rand_2 = 0
count_tot_2 = 0

for _ in range(n_trials):
    
    g = Game()
    player1.reset_counters()
    winner = g.play(player1, player2)
    if winner == 0: wins_first += 1
    count_rand_1 +=  player1.get_random_count()
    count_tot_1 += player1.get_tot_count()

    g = Game()
    player1.reset_counters()
    winner = g.play(player2, player1)
    if winner == 1: wins_second += 1
    count_rand_2 += player1.get_random_count()
    count_tot_2 += player1.get_tot_count()

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")
print(f'player played {count_tot_1 - count_rand_1} non-random moves over {count_tot_1} moves as first')
print(f'player played {count_tot_2 - count_rand_2} non-random moves over {count_tot_2} moves as second')

100%|██████████| 1000/1000 [04:08<00:00,  4.02it/s]


max depth: 6
states explored: 1469700
Player won 555 / 1000 as first
Player won 496 / 1000 as second
player played 4030 non-random moves over 25379 moves as first
player played 2792 non-random moves over 24955 moves as second


In [37]:
def train_and_test(use_simmetries= False, use_random_games= False, eps_greedy= False, eps= 2, add_sim_moves= False, add_n_moves= False, add_eval= False, alpha= 1, simulations_on_new= 1):
    print(f'use_simmetries: {use_simmetries}')
    print(f'use_random_games: {use_random_games}')
    print(f'eps_greedy: {eps_greedy}')
    print(f'eps: {eps}')
    print(f'add_sim_moves: {add_sim_moves}')
    print(f'add_n_moves: {add_n_moves}')
    print(f'add_eval: {add_eval}')
    print(f'alpha: {alpha}')
    print(f'simulations_on_new: {simulations_on_new}')
    mc = MyPlayer(
        use_simmetries= use_simmetries,
        use_random_games= use_random_games,
        eps_greedy= eps_greedy,
        eps= eps,
        add_sim_moves= add_sim_moves,
        add_n_moves= add_n_moves,
        add_eval= add_eval,
        alpha= alpha,
        simulations_on_new= simulations_on_new
    )
    mc.train_wrapper(100)

    ThePlayer = mc

    wins_first = 0
    wins_second = 0
    n_trials = 1000

    player1 = ThePlayer
    player2 = RandomPlayer()

    count_rand_1 = 0
    count_tot_1 = 0
    count_rand_2 = 0
    count_tot_2 = 0

    for _ in range(n_trials):
        
        g = Game()
        player1.reset_counters()
        winner = g.play(player1, player2)
        if winner == 0: wins_first += 1
        count_rand_1 +=  player1.get_random_count()
        count_tot_1 += player1.get_tot_count()

        g = Game()
        player1.reset_counters()
        winner = g.play(player2, player1)
        if winner == 1: wins_second += 1
        count_rand_2 += player1.get_random_count()
        count_tot_2 += player1.get_tot_count()

    print(f"Player won {wins_first} / {n_trials} as first")
    print(f"Player won {wins_second} / {n_trials} as second")
    print(f'player played {count_tot_1 - count_rand_1} non-random moves over {count_tot_1} moves as first')
    print(f'player played {count_tot_2 - count_rand_2} non-random moves over {count_tot_2} moves as second')
    print('\n=======================================================================')
    print('=======================================================================\n')

    return wins_first + wins_second, (count_tot_1 - count_rand_1) + (count_tot_2 - count_rand_2)

best_no_rand = 0
best_no_rand_wins = 0
best_conf_no_rand = None
best_win = 0
best_wins_no_rand = 0
best_conf_win = None

for use_simmetries in [False, True]:
    
    for use_random_games in [False, True]:

        if use_random_games:
            eg_range = [False]
            eps_range = [0.25, 0.5, 0.75]
            asm_range = [False]
        else:
            eg_range = [False, True]
            eps_range = [1, 2, 4, 8]
            asm_range = [False, True]

        for eps_greedy in eg_range:
            if eps_greedy and not use_random_games: eps_range = [0]

            for eps in eps_range:

                for add_sim_moves in asm_range:

                    for add_n_moves in [False, True]:

                        for add_eval in [False]:#, True]:
                            alpha_range = [0.25, 1, 4] if add_eval else [0]

                            for alpha in alpha_range:

                                for simulations_on_new in [1, 2]:

                                    wins, non_rand = train_and_test(use_simmetries, use_random_games, eps_greedy, eps, add_sim_moves, add_n_moves, add_eval, alpha, simulations_on_new)

                                    if wins > best_win:
                                        best_win = wins
                                        best_wins_no_rand = non_rand
                                        best_conf_win = (use_simmetries, use_random_games, eps_greedy, eps, add_sim_moves, add_n_moves, add_eval, alpha, simulations_on_new)
                                    
                                    if non_rand > best_no_rand:
                                        best_no_rand = non_rand
                                        best_no_rand_wins = wins
                                        best_conf_no_rand = (use_simmetries, use_random_games, eps_greedy, eps, add_sim_moves, add_n_moves, add_eval, alpha, simulations_on_new)

print('best for wins')
use_simmetries, use_random_games, eps_greedy, eps, add_sim_moves, add_n_moves, add_eval, alpha, simulations_on_new = best_conf_win
print(f'use_simmetries: {use_simmetries}')
print(f'use_random_games: {use_random_games}')
print(f'eps_greedy: {eps_greedy}')
print(f'eps: {eps}')
print(f'add_sim_moves: {add_sim_moves}')
print(f'add_n_moves: {add_n_moves}')
print(f'add_eval: {add_eval}')
print(f'alpha: {alpha}')
print(f'simulations_on_new: {simulations_on_new}')
print('-')
print(f'won {best_win} times')
print(f'with {best_wins_no_rand} non random moves')

print('best for non random moves')
use_simmetries, use_random_games, eps_greedy, eps, add_sim_moves, add_n_moves, add_eval, alpha, simulations_on_new = best_conf_no_rand
print(f'use_simmetries: {use_simmetries}')
print(f'use_random_games: {use_random_games}')
print(f'eps_greedy: {eps_greedy}')
print(f'eps: {eps}')
print(f'add_sim_moves: {add_sim_moves}')
print(f'add_n_moves: {add_n_moves}')
print(f'add_eval: {add_eval}')
print(f'alpha: {alpha}')
print(f'simulations_on_new: {simulations_on_new}')
print('-')
print(f'won {best_no_rand_wins} times')
print(f'used {best_no_rand} non random moves')

use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 1
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:11<00:00,  8.54it/s]


max depth: 3
states explored: 1349
Player won 575 / 1000 as first
Player won 508 / 1000 as second
player played 1232 non-random moves over 23833 moves as first
player played 1014 non-random moves over 23386 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 1
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:22<00:00,  4.45it/s]


max depth: 3
states explored: 1351
Player won 561 / 1000 as first
Player won 510 / 1000 as second
player played 1163 non-random moves over 23941 moves as first
player played 1021 non-random moves over 23333 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 1
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:11<00:00,  8.49it/s]


max depth: 3
states explored: 1346
Player won 562 / 1000 as first
Player won 470 / 1000 as second
player played 1190 non-random moves over 24563 moves as first
player played 1019 non-random moves over 23629 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 1
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:21<00:00,  4.57it/s]


max depth: 3
states explored: 1337
Player won 573 / 1000 as first
Player won 495 / 1000 as second
player played 1211 non-random moves over 24055 moves as first
player played 1013 non-random moves over 23452 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 1
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:12<00:00,  7.73it/s]


max depth: 3
states explored: 167507
Player won 535 / 1000 as first
Player won 503 / 1000 as second
player played 1220 non-random moves over 24299 moves as first
player played 1028 non-random moves over 23757 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 1
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:24<00:00,  4.03it/s]


max depth: 3
states explored: 328930
Player won 528 / 1000 as first
Player won 494 / 1000 as second
player played 1192 non-random moves over 23375 moves as first
player played 1035 non-random moves over 23550 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 1
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:13<00:00,  7.64it/s]


max depth: 3
states explored: 169388
Player won 546 / 1000 as first
Player won 531 / 1000 as second
player played 1222 non-random moves over 24179 moves as first
player played 1007 non-random moves over 24443 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 1
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:25<00:00,  3.99it/s]


max depth: 4
states explored: 330350
Player won 585 / 1000 as first
Player won 498 / 1000 as second
player played 1378 non-random moves over 23885 moves as first
player played 1059 non-random moves over 23547 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 2
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:11<00:00,  8.53it/s]


max depth: 3
states explored: 1355
Player won 561 / 1000 as first
Player won 507 / 1000 as second
player played 1166 non-random moves over 24089 moves as first
player played 1053 non-random moves over 23508 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 2
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:21<00:00,  4.56it/s]


max depth: 3
states explored: 1377
Player won 556 / 1000 as first
Player won 514 / 1000 as second
player played 1185 non-random moves over 23741 moves as first
player played 1020 non-random moves over 24081 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 2
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:11<00:00,  8.65it/s]


max depth: 3
states explored: 1326
Player won 565 / 1000 as first
Player won 494 / 1000 as second
player played 1207 non-random moves over 23943 moves as first
player played 1014 non-random moves over 23817 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 2
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:22<00:00,  4.45it/s]


max depth: 3
states explored: 1397
Player won 584 / 1000 as first
Player won 491 / 1000 as second
player played 1161 non-random moves over 24184 moves as first
player played 1024 non-random moves over 23661 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 2
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:12<00:00,  7.98it/s]


max depth: 3
states explored: 166801
Player won 527 / 1000 as first
Player won 534 / 1000 as second
player played 1161 non-random moves over 23943 moves as first
player played 1091 non-random moves over 24130 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 2
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:24<00:00,  4.00it/s]


max depth: 3
states explored: 329832
Player won 554 / 1000 as first
Player won 484 / 1000 as second
player played 1168 non-random moves over 23583 moves as first
player played 1010 non-random moves over 23516 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 2
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:13<00:00,  7.63it/s]


max depth: 3
states explored: 166336
Player won 561 / 1000 as first
Player won 476 / 1000 as second
player played 1188 non-random moves over 23670 moves as first
player played 1010 non-random moves over 23221 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 2
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:24<00:00,  4.07it/s]


max depth: 3
states explored: 329062
Player won 550 / 1000 as first
Player won 493 / 1000 as second
player played 1160 non-random moves over 23890 moves as first
player played 1026 non-random moves over 23388 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 4
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:11<00:00,  8.67it/s]


max depth: 3
states explored: 1334
Player won 577 / 1000 as first
Player won 483 / 1000 as second
player played 1130 non-random moves over 23855 moves as first
player played 1017 non-random moves over 23175 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 4
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:22<00:00,  4.43it/s]


max depth: 3
states explored: 1357
Player won 569 / 1000 as first
Player won 470 / 1000 as second
player played 1138 non-random moves over 23470 moves as first
player played 1005 non-random moves over 23878 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 4
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:11<00:00,  8.69it/s]


max depth: 3
states explored: 1332
Player won 576 / 1000 as first
Player won 538 / 1000 as second
player played 1164 non-random moves over 23471 moves as first
player played 1043 non-random moves over 23568 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 4
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:22<00:00,  4.50it/s]


max depth: 3
states explored: 1341
Player won 571 / 1000 as first
Player won 522 / 1000 as second
player played 1150 non-random moves over 24160 moves as first
player played 1018 non-random moves over 23415 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 4
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:13<00:00,  7.50it/s]


max depth: 3
states explored: 168212
Player won 565 / 1000 as first
Player won 512 / 1000 as second
player played 1140 non-random moves over 23504 moves as first
player played 1035 non-random moves over 23378 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 4
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:24<00:00,  4.00it/s]


max depth: 3
states explored: 331212
Player won 557 / 1000 as first
Player won 491 / 1000 as second
player played 1149 non-random moves over 23198 moves as first
player played 1013 non-random moves over 23623 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 4
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:12<00:00,  7.91it/s]


max depth: 3
states explored: 164893
Player won 550 / 1000 as first
Player won 513 / 1000 as second
player played 1159 non-random moves over 23826 moves as first
player played 1017 non-random moves over 23682 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 4
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:25<00:00,  3.87it/s]


max depth: 3
states explored: 332750
Player won 573 / 1000 as first
Player won 497 / 1000 as second
player played 1382 non-random moves over 24166 moves as first
player played 1042 non-random moves over 23841 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 8
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:11<00:00,  8.50it/s]


max depth: 3
states explored: 1358
Player won 544 / 1000 as first
Player won 510 / 1000 as second
player played 1178 non-random moves over 24109 moves as first
player played 1026 non-random moves over 23497 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 8
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:22<00:00,  4.38it/s]


max depth: 3
states explored: 1343
Player won 567 / 1000 as first
Player won 531 / 1000 as second
player played 1134 non-random moves over 22993 moves as first
player played 1011 non-random moves over 23726 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 8
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:11<00:00,  8.45it/s]


max depth: 3
states explored: 1341
Player won 588 / 1000 as first
Player won 505 / 1000 as second
player played 1161 non-random moves over 23867 moves as first
player played 1007 non-random moves over 23955 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 8
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:22<00:00,  4.52it/s]


max depth: 3
states explored: 1371
Player won 562 / 1000 as first
Player won 512 / 1000 as second
player played 1114 non-random moves over 24151 moves as first
player played 1031 non-random moves over 23505 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 8
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:13<00:00,  7.68it/s]


max depth: 3
states explored: 165300
Player won 558 / 1000 as first
Player won 515 / 1000 as second
player played 1130 non-random moves over 23673 moves as first
player played 1022 non-random moves over 23640 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 8
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:25<00:00,  3.90it/s]


max depth: 3
states explored: 332535
Player won 559 / 1000 as first
Player won 535 / 1000 as second
player played 1128 non-random moves over 24082 moves as first
player played 1019 non-random moves over 23259 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 8
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:12<00:00,  7.85it/s]


max depth: 3
states explored: 167915
Player won 577 / 1000 as first
Player won 478 / 1000 as second
player played 1144 non-random moves over 24019 moves as first
player played 1012 non-random moves over 23728 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: False
eps: 8
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:25<00:00,  4.00it/s]


max depth: 3
states explored: 326844
Player won 587 / 1000 as first
Player won 508 / 1000 as second
player played 1479 non-random moves over 24360 moves as first
player played 1054 non-random moves over 24274 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: True
eps: 0
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:11<00:00,  8.94it/s]


max depth: 6
states explored: 1891
Player won 546 / 1000 as first
Player won 515 / 1000 as second
player played 1517 non-random moves over 24451 moves as first
player played 748 non-random moves over 22926 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: True
eps: 0
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:21<00:00,  4.68it/s]


max depth: 6
states explored: 1850
Player won 547 / 1000 as first
Player won 517 / 1000 as second
player played 1459 non-random moves over 23798 moves as first
player played 690 non-random moves over 23518 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: True
eps: 0
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:11<00:00,  9.04it/s]


max depth: 7
states explored: 1809
Player won 561 / 1000 as first
Player won 506 / 1000 as second
player played 1349 non-random moves over 23702 moves as first
player played 531 non-random moves over 24066 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: True
eps: 0
add_sim_moves: False
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:21<00:00,  4.69it/s]


max depth: 6
states explored: 1849
Player won 542 / 1000 as first
Player won 501 / 1000 as second
player played 1471 non-random moves over 24391 moves as first
player played 383 non-random moves over 23240 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: True
eps: 0
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:12<00:00,  7.70it/s]


max depth: 7
states explored: 156068
Player won 552 / 1000 as first
Player won 495 / 1000 as second
player played 1217 non-random moves over 23970 moves as first
player played 911 non-random moves over 23097 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: True
eps: 0
add_sim_moves: True
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:23<00:00,  4.32it/s]


max depth: 9
states explored: 307441
Player won 556 / 1000 as first
Player won 506 / 1000 as second
player played 1349 non-random moves over 24298 moves as first
player played 783 non-random moves over 23788 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: True
eps: 0
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 1


100%|██████████| 100/100 [00:13<00:00,  7.64it/s]


max depth: 6
states explored: 160773
Player won 555 / 1000 as first
Player won 506 / 1000 as second
player played 1207 non-random moves over 23747 moves as first
player played 605 non-random moves over 23936 moves as second


use_simmetries: False
use_random_games: False
eps_greedy: True
eps: 0
add_sim_moves: True
add_n_moves: True
add_eval: False
alpha: 0
simulations_on_new: 2


100%|██████████| 100/100 [00:23<00:00,  4.24it/s]


max depth: 5
states explored: 313391
Player won 555 / 1000 as first
Player won 503 / 1000 as second
player played 1895 non-random moves over 24281 moves as first
player played 551 non-random moves over 23424 moves as second


use_simmetries: False
use_random_games: True
eps_greedy: False
eps: 0.25
add_sim_moves: False
add_n_moves: False
add_eval: False
alpha: 0
simulations_on_new: 1


  0%|          | 0/100 [00:00<?, ?it/s]


KeyError: (-1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)