In [62]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

In [63]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self): return self._board

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

In [64]:
border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16


In [65]:
## state can be represented as two numbers if 0,1 are considered as bit

In [66]:
import numpy as np

def state_to_board(state):
    binary_string = format(state, '050b')
    binary_array = np.array(list(map(int, binary_string))).reshape(2, 5, 5)

    board = np.zeros((5, 5), dtype=int)
    board[binary_array[0] == 1] = -1
    board[binary_array[1] == 1] = 1

    return board

def board_to_state(board):
    binary_array = np.zeros((2, 5, 5), dtype=int)
    
    binary_array[0][board == -1] = 1
    binary_array[1][board == 1] = 1

    binary_string = ''.join(map(str, binary_array.flatten()))
    return int(binary_string, 2)



rand_board = np.random.choice([-1, 0, 1], size=(5, 5), replace=True)
print('Board:')
print(rand_board)

rand_state = board_to_state(rand_board)
rand_board = state_to_board(rand_state)

print('\nState:')
print(rand_state)
print('\nBoard:')
print(state_to_board(rand_state))

Board:
[[-1  1  1 -1  1]
 [ 0 -1  1  0  0]
 [ 0 -1 -1  0  0]
 [ 1  1  0  0  1]
 [ 0  1  0 -1  1]]

State:
642527188353833

Board:
[[-1  1  1 -1  1]
 [ 0 -1  1  0  0]
 [ 0 -1 -1  0  0]
 [ 1  1  0  0  1]
 [ 0  1  0 -1  1]]


In [67]:
dict_rot = {
    (Move.TOP, 1): Move.RIGHT,
    (Move.TOP, 2): Move.BOTTOM,
    (Move.TOP, 3): Move.LEFT,
    (Move.BOTTOM, 1): Move.LEFT,
    (Move.BOTTOM, 2): Move.TOP,
    (Move.BOTTOM, 3): Move.RIGHT,
    (Move.LEFT, 1): Move.TOP,
    (Move.LEFT, 2): Move.RIGHT,
    (Move.LEFT, 3): Move.BOTTOM,
    (Move.RIGHT, 1): Move.BOTTOM,
    (Move.RIGHT, 2): Move.LEFT,
    (Move.RIGHT, 3): Move.TOP,
}

dict_flip = {
    Move.TOP: Move.TOP,
    Move.BOTTOM: Move.BOTTOM,
    Move.LEFT: Move.RIGHT,
    Move.RIGHT: Move.LEFT,
}

#rot_orario: (3, 4) -> (4, 1) -> (1, 0) -> (0, 3) -> (3, 4)
#: (xi, yi) -> (yi, 4 - xi)
#rot_anti_orario: (3, 4) -> (0, 3) -> (1, 0) -> (4, 1) -> (3, 4)
#: (xi, yi) -> (4 - yi, xi)

def rot(n_rot):
    def rot_n(from_pos, move):
        for _ in range(n_rot):
            from_pos = 4 - from_pos[1], from_pos[0]
        return from_pos, dict_rot[(move, n_rot)]
    return rot_n

def flip(from_pos, move):
    from_pos = 4 - from_pos[0], from_pos[1]
    return from_pos, dict_flip[move]

def flip_rot(n_rot):
    def flip_rot_n(from_pos, move):
        from_pos, move = rot(n_rot)(from_pos, move)
        return flip(from_pos, move)
    return flip_rot_n

rot1 = rot(1)
rot2 = rot(2)
rot3 = rot(3)
flip_rot1 = flip_rot(1)
flip_rot2 = flip_rot(2)
flip_rot3 = flip_rot(3)

verse_simmetries = [
    rot3,
    rot2,
    rot1,
    flip,
    flip_rot3,
    flip_rot2,
    flip_rot1,
]

inverse_simmetries = [
    rot1,
    rot2,
    rot3,
    flip,
    flip_rot1,
    flip_rot2,
    flip_rot3,
]

def check_simmetries(board, next_to_move, state_list):

    base_state = tuple(list(board.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, None

    R1 = np.rot90(board)
    base_state = tuple(list(R1.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 0

    R2 = np.rot90(R1)
    base_state = tuple(list(R2.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 1

    R3 = np.rot90(R2)
    base_state = tuple(list(R3.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 2
    
    F = np.fliplr(board)
    base_state = tuple(list(F.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 3
    
    FR1 = np.rot90(F)
    base_state = tuple(list(FR1.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 4
    
    FR2 = np.rot90(FR1)
    base_state = tuple(list(FR2.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 5
    
    FR3 = np.rot90(FR2)
    base_state = tuple(list(FR3.flatten()) + [next_to_move])
    if base_state in state_list: return base_state, 6
    
    return None

MOVES_SIMMETRIES = {} #(id_move, id_simmetry) -> id_move

for id_move in range(len(ALL_MOVES)):
    from_pos, move = ALL_MOVES[id_move]

    for id_simmetry in range(len(inverse_simmetries)):

        idx = None
        for i in range(len(ALL_MOVES)):
            if ALL_MOVES[i] == inverse_simmetries[id_simmetry](from_pos, move):
                idx = i
                break
        
        MOVES_SIMMETRIES[(id_move, id_simmetry)] = i

print(len(MOVES_SIMMETRIES))
print(len(ALL_MOVES) * 7)

308
308


In [68]:
## to discard for the amount of possible states

## to change with a check that control if a state already exist, if yes retreive the q-values, if not it creates a random q-value for 
## each move for that state, if legal


#import itertools
#from tqdm import tqdm
#MATRIX_SIZE = 5
#
#count_all = 0
#for s in itertools.product([-1, 0, 1], repeat= pow(MATRIX_SIZE, 2)): count_all += 1
#print(count_all)
#print('--------------')
#
#states_list = []
#
#for s in tqdm(itertools.product([-1, 0, 1], repeat= pow(MATRIX_SIZE, 2))):
#    if check_simmetries(np.array(s).reshape(MATRIX_SIZE, MATRIX_SIZE), states_list) is None:
#        states_list.append(tuple(s))
#    
#    #if count_all % 100 == 0:
#    #    print((len(states_list), count_all))
#
#print(count_all)
#print(len(states_list))

In [69]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class SimNet(nn.Module):
    def __init__(self):
        super(SimNet, self).__init__()

        self.dense_1 = nn.Linear(26, 52)
        self.dense_2 = nn.Linear(52, 52)

        self.conv = nn.Conv2d(in_channels= 1, out_channels= 3, kernel_size= 3, stride= 1)

        self.dense_out = nn.Linear(27 + 52, 1)

        self.relu = nn.ReLU()
        self.flatten = nn.Flatten()

    def forward(self, x):

        board = x[:, :25]
        board = board.view(board.size(0), 1, 5, 5)

        dense_res = self.dense_1(x)
        dense_res = self.relu(dense_res)
        dense_res = self.dense_2(dense_res)
        dense_res = self.relu(dense_res)

        conv_res = self.conv(board)
        conv_res = self.relu(conv_res)

        res = torch.cat([dense_res, self.flatten(conv_res)], dim=1)

        out = self.dense_out(res)

        return out

class MyPlayer(Player):
    def __init__(self, eps= 0.5, simulations_on_new= 1, base_until_move_change= 10) -> None:
        super().__init__()

        self.eps = eps
        self.simulation_on_new = simulations_on_new

        self.states_set = []

        self.dummy = Dummy_Game()

        self.base_until_move_change = base_until_move_change
        self.last_pos_move = None
        self.until_move_change = self.base_until_move_change

        self.value_model = SimNet()
        self.optimizer = optim.Adam(self.value_model.parameters())
        self.criterion = nn.L1Loss()

        self.memory_win = []
        self.memory_lose = []
        self.batch_size = 64
        self.epochs = 10

        self.winning_move_0 = {}
        self.winning_move_1 = {}

    def reset_counters(self):
        self.until_move_change = self.base_until_move_change
    
    def evaluate_board(self, board):

        bonus_0 = 0
        diag_a_0 = 0
        diag_b_0 = 0
        bonus_1 = 0
        diag_a_1 = 0
        diag_b_1 = 0
        for i in range(5):
            
            line = board[i, :]
            bonus_0 += pow(sum(line == 0), 2)
            bonus_1 += pow(sum(line == 1), 2)

            line = board[:, i]
            bonus_0 += pow(sum(line == 0), 2)
            bonus_1 += pow(sum(line == 1), 2)
            
            if board[i, i] == 0: diag_a_0 += 1
            elif board[i, i] == 1: diag_a_1 += 1
            
            if board[i, 4-i] == 0: diag_b_0 += 1
            elif board[i, 4-i] == 1: diag_b_1 += 1

        bonus_0 += pow(diag_a_0, 2)
        bonus_1 += pow(diag_a_1, 2)
            
        bonus_0 += pow(diag_b_0, 2)
        bonus_1 += pow(diag_b_1, 2)

        return bonus_0 / 300, bonus_1 / 300
    
    def simulation(self, base_board, next_to_move):

        # random simulation from a certain state to the end, return outcomes

        win_0 = 0
        win_1 = 0

        for _ in range(self.simulation_on_new):

            next_to_play = next_to_move
            board = deepcopy(base_board)

            winner = self.dummy.check_winner_board(board)
            while winner == -1:

                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)
                while not ok:
                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                    new_board, ok = self.dummy.single_move(board, from_pos, move, next_to_play)

                board = new_board
                next_to_play = 1 - next_to_play

                winner = self.dummy.check_winner_board(board)

            if winner == 0: win_0 += 1
            else: win_1 += 1
            
        #eval_0, eval_1 = self.evaluate_board(base_board)
        
        return ((win_0 / self.simulation_on_new) - (win_1 / self.simulation_on_new))# + (eval_0 - eval_1)

    def selection(self, state, player_id):

        if player_id == 0:
            if state in self.winning_move_0:
                print('winning move used 0')
                return self.winning_move_0[state]
        else:
            if state in self.winning_move_1:
                print('winning move used 1')
                return self.winning_move_1[state]

        board = np.array(state)[:-1].reshape(5, 5)

        states_to_evaluate = []
        moves = []

        for from_pos, move in ALL_MOVES:
            
            new_board, ok = self.dummy.single_move(board, from_pos, move, player_id)
            if ok:

                states_to_evaluate.append(np.array(list(new_board.flatten()) + [1 - player_id]))
                moves.append((from_pos, move))

        states_to_evaluate = torch.tensor(np.array(states_to_evaluate), dtype=torch.float32)

        values = self.value_model(states_to_evaluate)
        values = [v.detach().numpy() for v in values]

        if player_id == 0: return moves[np.argmax(values)]
        else: return moves[np.argmin(values)]

    def train_step(self):

        X_train = []
        Y_train = []
    
        idx_rand = np.random.choice(range(len(self.memory_win)), size= (self.batch_size,), replace= False)
        win = [self.memory_win[i] for i in idx_rand]
        idx_rand = np.random.choice(range(len(self.memory_lose)), size= (self.batch_size,), replace= False)
        lose = [self.memory_lose[i] for i in idx_rand]
        
        for w, l in zip(win, lose):
            X_train.append(np.array(w[0]))
            X_train.append(np.array(l[0]))
            Y_train.append(w[1])
            Y_train.append(l[1])
        
        X_train = np.array(X_train)
        #print(f'X_train shape: {X_train.shape}')
        Y_train = np.array(Y_train)
        #print(f'Y_train shape: {Y_train.shape}')

        X_train = torch.tensor(X_train, dtype=torch.float32)
        Y_train = torch.tensor(Y_train, dtype=torch.float32).view(-1, 1)

        self.optimizer.zero_grad()
        output = self.value_model(X_train)
        loss = self.criterion(output, Y_train)
        loss.backward()
        self.optimizer.step()

    def train_wrapper(self, n_games= 1000):

        for _ in tqdm(range(n_games)):
        #for _ in range(n_games):

            board = np.ones((5, 5), dtype= np.uint8) * -1
            next_to_move = 0
            state = tuple(list(board.flatten()) + [next_to_move])

            #print('==========================')
            #print('NEW GAME')
            #print('==========================')

            winner = -1
            while winner == -1:

                #print('==========================')

                #print('board')
                #print(board)
                
                base_board = np.array(state)[:-1].reshape(5, 5)
                #print('base board')
                #print(base_board)

                if np.random.random() < self.eps:
                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                    while base_board[from_pos[1], from_pos[0]] == 1 - next_to_move: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                else: from_pos, move = self.selection(state, next_to_move)

                board, ok = self.dummy.single_move(board, from_pos, move, next_to_move)
                #print(f'ok: {ok}')
                next_to_move = 1 - next_to_move
                prev_state = state
                state = tuple(list(board.flatten()) + [next_to_move])

                #print(f'new board is')
                #print(board)

                winner = self.dummy.check_winner_board(board)

                ############## SOM

                if winner == -1 and 1 - next_to_move == 0:
                    value = self.simulation(board, next_to_move)
                    if value > 0:
                        self.memory_win.append((state, value))
                        if len(self.memory_win) > self.batch_size * 10: self.memory_win = self.memory_win[1:]
                    else:
                        self.memory_lose.append((state, value))
                        if len(self.memory_lose) > self.batch_size * 10: self.memory_lose = self.memory_lose[1:]

                    #if len(self.memory_win) > self.batch_size and len(self.memory_lose) > self.batch_size: self.train_step()

                ##############

            #if winner == 0: self.winning_move_0[prev_state] = (from_pos, move)
            #else: self.winning_move_1[prev_state] = (from_pos, move) 

            if winner == 0:
                self.memory_win.append((state, 1))
                if len(self.memory_win) > self.batch_size * 10: self.memory_win = self.memory_win[1:]
            else:
                self.memory_lose.append((state, -1))
                if len(self.memory_lose) > self.batch_size * 10: self.memory_lose = self.memory_lose[1:]

            if len(self.memory_win) > self.batch_size and len(self.memory_lose) > self.batch_size: self.train_step()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        board = game.get_board()
        state = tuple(list(board.flatten()) + [game.current_player_idx])

        pos_move = self.selection(state, game.current_player_idx)

        if pos_move == self.last_pos_move:
            self.until_move_change -= 1
            if self.until_move_change == 0:
                #print('change')

                #print('rand')
                board = game.get_board()
                player_id = game.current_player_idx

                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                while board[from_pos[1], from_pos[0]] == 1 - player_id: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                self.until_move_change = self.base_until_move_change
        else: self.last_pos_move = pos_move

        from_pos, move = pos_move
        
        #print((from_pos, move))
        #print('---------------')
        return from_pos, move

In [76]:
mc = MyPlayer(eps= 1, simulations_on_new= 20)
mc.train_wrapper(10000)

ThePlayer = mc

wins_first = 0
wins_second = 0
n_trials = 100

player1 = ThePlayer
player2 = RandomPlayer()

for _ in tqdm(range(n_trials)):
    
    g = Game()
    player1.reset_counters()
    winner = g.play(player1, player2)
    if winner == 0: wins_first += 1

    g = Game()
    player1.reset_counters()
    winner = g.play(player2, player1)
    if winner == 1: wins_second += 1

print(f"Player won {wins_first * 100 / n_trials}% ({wins_first}) as first")
print(f"Player won {wins_second * 100 / n_trials}% ({wins_second}) as second")

  0%|          | 0/10000 [00:00<?, ?it/s]

100%|██████████| 10000/10000 [2:28:11<00:00,  1.12it/s] 
100%|██████████| 100/100 [00:03<00:00, 27.90it/s]

Player won 90.0% (90) as first
Player won 98.0% (98) as second



