## minimax with alpha-beta pruning

In [None]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

In [None]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self): return self._board

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

In [13]:
border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16
44


In [14]:
## state can be represented as a number if 0,1 are considered as bit

In [15]:
import numpy as np

def state_to_board(state):
    binary_string = format(state, '050b')
    binary_array = np.array(list(map(int, binary_string))).reshape(2, 5, 5)

    board = np.zeros((5, 5), dtype=int)
    board[binary_array[0] == 1] = -1
    board[binary_array[1] == 1] = 1

    return board

def board_to_state(board):
    binary_array = np.zeros((2, 5, 5), dtype=int)
    
    binary_array[0][board == -1] = 1
    binary_array[1][board == 1] = 1

    binary_string = ''.join(map(str, binary_array.flatten()))
    return int(binary_string, 2)



rand_board = np.random.choice([-1, 0, 1], size=(5, 5), replace=True)
print('Board:')
print(rand_board)

rand_state = board_to_state(rand_board)
rand_board = state_to_board(rand_state)

print('\nState:')
print(rand_state)
print('\nBoard:')
print(state_to_board(rand_state))

Board:
[[-1  1  0 -1  0]
 [-1 -1  1  1 -1]
 [ 0 -1  1  1 -1]
 [ 0 -1  1  1 -1]
 [-1 -1 -1 -1 -1]]

State:
661126438394048

Board:
[[-1  1  0 -1  0]
 [-1 -1  1  1 -1]
 [ 0 -1  1  1 -1]
 [ 0 -1  1  1 -1]
 [-1 -1 -1 -1 -1]]


In [16]:
import time

class TimeCounter:
    def __init__(self):
        self.tot_time = 0
        self.count = 0
    
    def add_t(self, t):
        self.tot_time += t
        self.count += 1

    def get(self): return self.tot_time, (self.tot_time / self.count) if self.count > 0 else 0

In [18]:
def minimax(game: "Dummy_Game", board, depth, maximizing, current_player, alpha, beta, transposition_table): #, time_counter):

    #time_start = time.perf_counter()
    
    state = tuple(board.flatten())
    #state = board_to_state(board)
    
    if state in transposition_table: return transposition_table[state]
    #time_counter.add_t((time.perf_counter() - time_start) * 1e6)

    winner = game.check_winner()
    if winner != -1 or depth == 0:
        return evaluate_board(board, winner, current_player if maximizing else 1 - current_player)

    if maximizing:
        max_eval = float('-inf')
        for om in ALL_MOVES:
            from_pos, move = om
            new_board, ok = game.single_move(board, from_pos, move, current_player)
            if ok:
                eval = minimax(game, new_board, depth - 1, False, 1 - current_player, alpha, beta, transposition_table)#, time_counter)

                max_eval = max(max_eval, eval)
                alpha = max(alpha, eval)

                if beta <= alpha: break  # Prune the remaining branches

        transposition_table[state] = max_eval
        return max_eval
    else:
        min_eval = float('inf')
        for om in ALL_MOVES:
            from_pos, move = om
            new_board, ok = game.single_move(board, from_pos, move, current_player)
            if ok:
                eval = minimax(game, new_board, depth - 1, True, 1 - current_player, alpha, beta, transposition_table)#, time_counter)

                min_eval = min(min_eval, eval)
                beta = min(beta, eval)

                if beta <= alpha: break  # Prune the remaining branches

        transposition_table[state] = min_eval
        return min_eval
    
def evaluate_board(board, winner, current_player):

    bonus = 0
    diag_a_player = 0
    diag_a_enemy = 0
    diag_b_player = 0
    diag_b_enemy = 0
    for i in range(5):
        
        line = board[i, :]
        bonus += pow(sum(line == current_player), 2) - pow(sum(line == 1 - current_player), 2)
        #bonus += sum(line == current_player) - sum(line == 1 - current_player)

        line = board[:, i]
        bonus += pow(sum(line == current_player), 2) - pow(sum(line == 1 - current_player), 2)
        #bonus += sum(line == current_player) - sum(line == 1 - current_player)
        
        if board[i, i] == current_player: diag_a_player += 1
        elif board[i, i] == 1 - current_player: diag_a_enemy += 1
        
        if board[i, 4-i] == current_player: diag_b_player += 1
        elif board[i, 4-i] == 1 - current_player: diag_b_enemy += 1

    bonus += pow(diag_a_player, 2) - pow(diag_a_enemy, 2)
    #bonus += diag_a_player - diag_a_enemy
        
    bonus += pow(diag_b_player, 2) - pow(diag_b_enemy, 2)
    #bonus += diag_b_player - diag_b_enemy

    #bonus += 15 if board[2, 2] == current_player else -16 # the center of the board is used in four winning combination

    #win_reward = 300  # 300 = 25 x 12 = 5^2 x winning lines
    win_reward = 60  # 60 = 5 x 12 = 5 x winning lines

    base_reward = -1 * win_reward # winner == 1 - current_player
    if winner == current_player: base_reward = win_reward
    elif winner == -1: base_reward = 0

    return base_reward + bonus

def minimax_wrapper(board, player_id, max_depth, transposition_table= {}):

    game = Dummy_Game()

    best_move = None
    best_eval = float('-inf')

    #time_counter = TimeCounter()

    for om in ALL_MOVES:
        from_pos, move = om
        new_board, ok = game.single_move(board, from_pos, move, player_id)
        if ok:
            om_eval = minimax(game, new_board, max_depth, False, 1 - player_id, float('-inf'), float('inf'), transposition_table) #, time_counter)

            if om_eval > best_eval:
                best_eval = om_eval
                best_move = om

    #print(f"Best move: {best_move} -> Best Value: {best_eval}")
    
    #tt, avgt = time_counter.get()
    #print(f"Elapsed time: {(tt, avgt)} microseconds")
    #print(f"Elapsed time: {tt / 1e6, avgt / 1e6} seconds")

    return best_move

initial_board = np.ones((5, 5)) * -1

minimax_wrapper(initial_board, 1, 0)

((0, 1), <Move.TOP: 0>)

In [19]:
class MyPlayer(Player):
    def __init__(self, max_depth= 0) -> None:
        super().__init__()
        self.max_depth = max_depth
        self.transposition_table = {}

        self.last_move = None, None, None
        self.until_draw = 10

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        board = game.get_board()

        from_pos, move = minimax_wrapper(board, game.current_player_idx, max_depth= self.max_depth, transposition_table= self.transposition_table)
        state = tuple(board.flatten())
        if state == self.last_move[0] and from_pos == self.last_move[1] and move == self.last_move[2]:
            self.until_draw -= 1
            if self.until_draw == 0:
                self.until_draw = 10
                from_pos, move = RandomPlayer().make_move(game)
            
        self.last_move = (state, from_pos, move)

        return from_pos, move


## tests

In [20]:
ThePlayer = MyPlayer(0)

wins_first = 0
wins_second = 0
n_trials = 1000

for i_g in tqdm(range(n_trials)):
#for i_g in range(n_trials):

    #print('==============================================================================')
    #print('==============================================================================')
    #print(f'game {i_g+1} - Player First (0)')

    game = Game()
    winner = game.play(ThePlayer, RandomPlayer())
    if winner == 0:
        #print(game.get_board())
        #print('Player win')
        wins_first += 1
    #else:
    #    print(game.get_board())
    #    print('Player lose')

    #print(f'game {i_g+1} - Player Second (1)')

    game = Game()
    winner = game.play(RandomPlayer(), ThePlayer)
    if winner == 1:
        #print(game.get_board())
        #print('Player win')
        wins_second += 1
    #else:
    #    print(game.get_board())
    #    print('Player lose')

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [02:04<00:00,  8.04it/s]

Player won 999 / 1000 as first
Player won 1000 / 1000 as second





In [21]:
ThePlayer = MyPlayer(1)

wins_first = 0
wins_second = 0
n_trials = 1000

for i_g in tqdm(range(n_trials)):
#for i_g in range(n_trials):

    #print('==============================================================================')
    #print('==============================================================================')
    #print(f'game {i_g+1} - Player First (0)')

    game = Game()
    winner = game.play(ThePlayer, RandomPlayer())
    if winner == 0:
        #print(game.get_board())
        #print('Player win')
        wins_first += 1
    #else:
    #    print(game.get_board())
    #    print('Player lose')

    #print(f'game {i_g+1} - Player Second (1)')

    game = Game()
    winner = game.play(RandomPlayer(), ThePlayer)
    if winner == 1:
        #print(game.get_board())
        #print('Player win')
        wins_second += 1
    #else:
    #    print(game.get_board())
    #    print('Player lose')

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

100%|██████████| 1000/1000 [25:44<00:00,  1.54s/it]

Player won 1000 / 1000 as first
Player won 998 / 1000 as second





In [22]:
ThePlayer = MyPlayer(2)

wins_first = 0
wins_second = 0
n_trials = 1000

for i_g in tqdm(range(n_trials)):
#for i_g in range(n_trials):

    #print('==============================================================================')
    #print('==============================================================================')
    #print(f'game {i_g+1} - Player First (0)')

    game = Game()
    winner = game.play(ThePlayer, RandomPlayer())
    if winner == 0:
        #print(game.get_board())
        #print('Player win')
        wins_first += 1
    #else:
    #    print(game.get_board())
    #    print('Player lose')

    #print(f'game {i_g+1} - Player Second (1)')

    game = Game()
    winner = game.play(RandomPlayer(), ThePlayer)
    if winner == 1:
        #print(game.get_board())
        #print('Player win')
        wins_second += 1
    #else:
    #    print(game.get_board())
    #    print('Player lose')

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

100%|██████████| 1000/1000 [2:02:29<00:00,  7.35s/it] 

Player won 947 / 1000 as first
Player won 921 / 1000 as second





In [71]:
max_depth_enemy = 2

print('==============================================================================')
print('==============================================================================')
print(f'MyPlayer(0) is first - MyPlayer({max_depth_enemy}) is second')

game = Game()
winner = game.play(MyPlayer(0), MyPlayer(max_depth_enemy))
if winner == 0: print('MyPlayer(0) win')
else: print(f'MyPlayer({max_depth_enemy}) win')

print('==============================================================================')
print('==============================================================================')
print(f'MyPlayer({max_depth_enemy}) is first - MyPlayer(0) is second')

game = Game()
winner = game.play(MyPlayer(max_depth_enemy), MyPlayer(0))
if winner == 0: print(f'MyPlayer({max_depth_enemy}) win')
else: print('MyPlayer(0) win')

MyPlayer(0) is first - MyPlayer(2) is second
MyPlayer(0) win
MyPlayer(2) is first - MyPlayer(0) is second
MyPlayer(0) win
