In [None]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

In [55]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self): return self._board

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

In [None]:
border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

In [None]:
import numpy as np

def state_to_board(state):
    binary_string = format(state, '050b')
    binary_array = np.array(list(map(int, binary_string))).reshape(2, 5, 5)

    board = np.zeros((5, 5), dtype=int)
    board[binary_array[0] == 1] = -1
    board[binary_array[1] == 1] = 1

    return board

def board_to_state(board):
    binary_array = np.zeros((2, 5, 5), dtype=int)
    
    binary_array[0][board == -1] = 1
    binary_array[1][board == 1] = 1

    binary_string = ''.join(map(str, binary_array.flatten()))
    return int(binary_string, 2)



rand_board = np.random.choice([-1, 0, 1], size=(5, 5), replace=True)
print('Board:')
print(rand_board)

rand_state = board_to_state(rand_board)
rand_board = state_to_board(rand_state)

print('\nState:')
print(rand_state)
print('\nBoard:')
print(state_to_board(rand_state))

In [58]:
dict_rot = {
    (Move.TOP, 1): Move.LEFT,
    (Move.TOP, 2): Move.BOTTOM,
    (Move.TOP, 3): Move.RIGHT,
    (Move.BOTTOM, 1): Move.RIGHT,
    (Move.BOTTOM, 2): Move.TOP,
    (Move.BOTTOM, 3): Move.LEFT,
    (Move.LEFT, 1): Move.TOP,
    (Move.LEFT, 2): Move.RIGHT,
    (Move.LEFT, 3): Move.BOTTOM,
    (Move.RIGHT, 1): Move.BOTTOM,
    (Move.RIGHT, 2): Move.LEFT,
    (Move.RIGHT, 3): Move.TOP,
}

dict_flip = {
    Move.TOP: Move.TOP,
    Move.BOTTOM: Move.BOTTOM,
    Move.LEFT: Move.RIGHT,
    Move.RIGHT: Move.LEFT,
}

#rot_orario: (3, 4) -> (4, 1) -> (1, 0) -> (0, 3) -> (3, 4)
#: (xi, yi) -> (yi, 4 - xi)
#rot_anti_orario: (3, 4) -> (0, 3) -> (1, 0) -> (4, 1) -> (3, 4)
#: (xi, yi) -> (4 - yi, xi)

def rot(n_rot):
    def rot_n(from_pos, move):
        for _ in range(n_rot):
            from_pos = 4 - from_pos[1], from_pos[0]
        return from_pos, dict_rot[(move, n_rot)]
    return rot_n

def flip(from_pos, move):
    from_pos = 4 - from_pos[0], from_pos[1]
    return from_pos, dict_flip[move]

def flip_rot(n_rot):
    def flip_rot_n(from_pos, move):
        from_pos, move = rot(n_rot)(from_pos, move)
        return flip(from_pos, move)
    return flip_rot_n

rot1 = rot(1)
rot2 = rot(2)
rot3 = rot(3)
flip_rot1 = flip_rot(1)
flip_rot2 = flip_rot(2)
flip_rot3 = flip_rot(3)

In [59]:
inverse_simmetries = [
    rot1,
    rot2,
    rot3,
    flip,
    flip_rot1,
    flip_rot2,
    flip_rot3,
]

In [None]:
def find_index(tup, list_of_tuple):
    for i in range(len(list_of_tuple)):
        if tup == list_of_tuple[i]: return i
    return None

In [60]:
def check_simmetries(board, state_list):

    if tuple(board.flatten()) in state_list: return tuple(board.flatten()), None

    R1 = np.rot90(board)
    if find_index(tuple(R1.flatten()), state_list) is not None: return tuple(R1.flatten()), 0

    R2 = np.rot90(R1)
    if find_index(tuple(R2.flatten()), state_list) is not None: return tuple(R2.flatten()), 1

    R3 = np.rot90(R2)
    if find_index(tuple(R3.flatten()), state_list) is not None: return tuple(R3.flatten()), 2
    
    F = np.fliplr(board)
    if find_index(tuple(F.flatten()), state_list) is not None: return tuple(F.flatten()), 3
    
    FR1 = np.rot90(F)
    if find_index(tuple(FR1.flatten()), state_list) is not None: return tuple(FR1.flatten()), 4
    
    FR2 = np.rot90(FR1)
    if find_index(tuple(FR2.flatten()), state_list) is not None: return tuple(FR2.flatten()), 5
    
    FR3 = np.rot90(FR2)
    if find_index(tuple(FR3.flatten()), state_list) is not None: return tuple(FR3.flatten()), 6
    
    return None

In [62]:
MOVES_SIMMETRIES = {} #(id_move, id_simmetry) -> id_move

for id_move in range(len(ALL_MOVES)):
    from_pos, move = ALL_MOVES[id_move]

    for id_simmetry in range(len(inverse_simmetries)):

        idx = None
        for i in range(len(ALL_MOVES)):
            if ALL_MOVES[i] == inverse_simmetries[id_simmetry](from_pos, move):
                idx = i
                break
        
        MOVES_SIMMETRIES[(id_move, id_simmetry)] = i

print(len(MOVES_SIMMETRIES))
print(len(ALL_MOVES) * 7)

308
308
((0, 0), 28)
((0, 1), 18)
((0, 2), 30)
((0, 3), 20)
((0, 4), 39)
((0, 5), 42)
((0, 6), 14)
((1, 0), 27)
((1, 1), 17)
((1, 2), 31)


In [61]:
## discarded for the amount of possible states

## to change with a check that control if a state already exist, if yes retreive the q-values, if not it creates a random q-value for 
## each move for that state, if legal


#import itertools
#from tqdm import tqdm
#MATRIX_SIZE = 5
#
#count_all = 0
#for s in itertools.product([-1, 0, 1], repeat= pow(MATRIX_SIZE, 2)): count_all += 1
#print(count_all)
#print('--------------')
#
#states_list = []
#
#for s in tqdm(itertools.product([-1, 0, 1], repeat= pow(MATRIX_SIZE, 2))):
#    if check_simmetries(np.array(s).reshape(MATRIX_SIZE, MATRIX_SIZE), states_list) is None:
#        states_list.append(tuple(s))
#    
#    #if count_all % 100 == 0:
#    #    print((len(states_list), count_all))
#
#print(count_all)
#print(len(states_list))

In [164]:
class MyPlayer(Player):
    def __init__(self, lr, discount, eps= 0.5, until_draw_base= 50) -> None:
        super().__init__()

        self.lr = lr
        self.discount = discount
        self.eps = eps

        self.q_table = {} # state -> array (N_ALL, 1) id_move, q_value
        self.move_list = [] # (state, id_move)
        self.next_list = None

        self.dummy = Dummy_Game()

        self.until_draw_base = until_draw_base
        self.last_move = None, None
        self.until_draw = until_draw_base

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        return self.inside_make_move(game.get_board(), game.current_player_idx)

    def inside_make_move(self, board, player_id) -> tuple[tuple[int, int], Move]:

        ######################

        simmetry = check_simmetries(board, list(self.q_table.keys()))

        if simmetry is None:
            state = tuple(board.flatten())
            id_simmetry = None

            #self.q_table[state] = np.random.random(size= (N_ALL,))
            self.q_table[state] = np.zeros(shape= (N_ALL,))

            id_move = np.random.randint(0, N_ALL)

        else:
            state, id_simmetry = simmetry
            if np.random.random() < self.eps: id_move = np.random.randint(0, N_ALL)
            else: id_move = np.argmax(self.q_table[state])

        ######################

#        state = tuple(board.flatten())
#        if state in list(self.q_table.keys()):
#        
#            if np.random.random() < self.eps: id_move = np.random.randint(0, N_ALL)
#            else: id_move = np.argmax(self.q_table[state])
#
#
#        else:
#            self.q_table[state] = np.zeros(shape= (N_ALL,))
#            id_move = np.random.randint(0, N_ALL)

        ######################

        #id_move = np.random.choice(ALL_MOVES, p= self.q_table[state]) # for prob based on q_values

        ######################
            
        if (state, id_move) == self.last_move:
            self.until_draw -= 1

            if self.until_draw == 0:
                id_move = np.random.randint(0, N_ALL)
                self.until_draw = self.until_draw_base

        else:
            self.until_draw = self.until_draw_base
            self.last_move = (state, id_move)

        #
        
        from_pos = ALL_MOVES[id_move][0]
        while board[from_pos[1], from_pos[0]] == 1 - player_id:
            id_move = np.random.randint(0, N_ALL)
            from_pos = ALL_MOVES[id_move][0]

        #

        self.move_list.append((state, id_move))

        if self.next_list is None: self.next_list = []
        else: self.next_list.append(state)

        #

        if id_simmetry is not None: id_move = MOVES_SIMMETRIES[(id_move, id_simmetry)]

        #

        return ALL_MOVES[id_move]
        
    
    def end_of_game(self, value, last_state):

        ## provare a usare make_move e single move due volte invece di usare next_list

        if last_state != self.next_list[-1]: self.next_list.append(last_state)

        for i in range(len(self.move_list)):
            state, id_move = self.move_list[i]
            next_state = self.next_list[i]

            if i == len(self.move_list) - 1:
                if value > 0: self.q_table[state][id_move] += 100
                elif value < 0: self.q_table[state][id_move] -= 100

            else:
                self.q_table[state][id_move] *= (1 - self.lr)
                self.q_table[state][id_move] += self.lr * (value + self.discount * max(self.q_table[next_state]))

        self.move_list = []
        self.next_list = []

In [167]:
rand = RandomPlayer()
player = MyPlayer(0.5, 0.5)

for _ in tqdm(range(1000)):
    
    game = Game()
    winner = game.play(player, rand)
    if winner == 0: reward = 1
    else: reward = -1
    player.end_of_game(reward, tuple(game.get_board().flatten()))
    
    game = Game()
    winner = game.play(rand, player)
    if winner == 1: reward = 1
    else: reward = -1
    player.end_of_game(reward, tuple(game.get_board().flatten()))

100%|██████████| 1000/1000 [12:47<00:00,  1.30it/s]


In [168]:
ThePlayer = player

wins_first = 0
wins_second = 0
n_trials = 100

player1 = ThePlayer
player2 = RandomPlayer()

for _ in tqdm(range(n_trials)):
    
    g = Game()
    winner = g.play(player1, player2)
    if winner == 0: wins_first += 1

    g = Game()
    winner = g.play(player2, player1)
    if winner == 1: wins_second += 1

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

100%|██████████| 100/100 [02:49<00:00,  1.69s/it]

Player won 56 / 100 as first
Player won 47 / 100 as second



