## Minimax with alpha-beta pruning

In [164]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

Dummy game class to leave the real Game class untouched

In [165]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self): return self._board

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

Computing all legal moves (when the board is empty)

In [166]:
border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16


Minimax class

In [167]:
def evaluate_board(board, winner, maximizing_player):

    win_reward = 1

    base_reward = -1 * win_reward # winner == 1 - maximizing_player
    if winner == maximizing_player: base_reward = win_reward
    elif winner == -1: base_reward = 0

    return base_reward

def minimax(game: "Dummy_Game", board, depth, maximizing, current_player, alpha, beta, transposition_table, min_weights):

    state = tuple(list(board.flatten()) + [current_player])

    if state in transposition_table: return transposition_table[state]

    winner = game.check_winner()
    if winner != -1 or depth == 0: return evaluate_board(board, winner, current_player if maximizing else 1 - current_player)

    if maximizing:
        max_eval = float('-inf')
        for om in ALL_MOVES:
            from_pos, move = om
            new_board, ok = game.single_move(board, from_pos, move, current_player)
            if ok:
                eval = minimax(game, new_board, depth - 1, False, 1 - current_player, alpha, beta, transposition_table, min_weights)

                max_eval = max(max_eval, eval)
                alpha = max(alpha, eval)

                if beta <= alpha: break  # Prune the remaining branches

        transposition_table[state] = max_eval
        return max_eval
    
    else:
        min_eval = float('inf')
        for i_om, om in enumerate(ALL_MOVES):
            from_pos, move = om
            new_board, ok = game.single_move(board, from_pos, move, current_player)
            if ok:
                eval = minimax(game, new_board, depth - 1, True, 1 - current_player, alpha, beta, transposition_table, min_weights)

                eval = eval * min_weights[i_om]

                min_eval = min(min_eval, eval)
                beta = min(beta, eval)

                if beta <= alpha: break  # Prune the remaining branches

        transposition_table[state] = min_eval
        return min_eval

def minimax_wrapper(game: "Dummy_Game", board, player_id, max_depth, min_weights, transposition_table= {}):

    best_move = None
    best_eval = float('-inf')

    for om in ALL_MOVES:
        from_pos, move = om
        new_board, ok = game.single_move(board, from_pos, move, player_id)
        if ok:
            om_eval = minimax(game, new_board, max_depth, False, 1 - player_id, float('-inf'), float('inf'), transposition_table, min_weights)

            if om_eval > best_eval:
                best_eval = om_eval
                best_move = om

    #print(f"Best move: {best_move} -> Best Value: {best_eval}")

    return best_move

In [168]:
class MyPlayer(Player):
    def __init__(self, max_depth= 0, min_weights= None, base_until_move_change= 10) -> None:
        super().__init__()

        self.max_depth = max_depth
        self.transposition_table = {}

        self.dummy = Dummy_Game()

        self.base_until_move_change = base_until_move_change
        self.last_pos_move = None
        self.until_move_change = self.base_until_move_change

        if min_weights is None: self.min_weights = np.random.random(size= (N_ALL,))
        else: self.min_weights = min_weights

        self.epoch_before_death = 3

    def get_weights(self): return self.min_weights

    def get_death(self, bonus= None):
        if bonus is not None: self.epoch_before_death = bonus + 1
        self.epoch_before_death -= 1
        return self.epoch_before_death

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
    
        board = game.get_board()

        pos_move = minimax_wrapper(self.dummy, board, game.current_player_idx, max_depth= self.max_depth, min_weights= self.min_weights, transposition_table= self.transposition_table)
        
        if pos_move == self.last_pos_move:
            self.until_move_change -= 1
            if self.until_move_change == 0:
                board = game.get_board()
                player_id = game.current_player_idx

                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                while board[from_pos[1], from_pos[0]] == 1 - player_id: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                self.until_move_change = self.base_until_move_change
        else: self.last_pos_move = pos_move

        from_pos, move = pos_move
        
        #print((from_pos, move))
        #print('---------------')
        return from_pos, move

functions for the EA

In [169]:
def evaluate_population(population, n_trials= 100):

    rand_p = RandomPlayer()

    len_p = len(population)

    scores = np.zeros(shape= (len_p,))

    for i in tqdm(range(len_p)):

        for _ in range(n_trials // 2):

            scores[i] += 1 if Game().play(population[i], rand_p) == 0 else 0

            scores[i] += 1 if Game().play(rand_p, population[i]) == 1 else 0

    return np.array(scores)

In [170]:
def procreate_1(p1, p2, c1, c2):
    
    W1 = p1.get_weights()
    W2 = p2.get_weights()

    W3 = deepcopy(W1)

    c = c1 / (c1 + c2)

    for i in range(len(W1)):
        if c < np.random.random():
            W3[i] = W2[i]

    return MyPlayer(max_depth= p1.max_depth, min_weights= W3)

def procreation(population, scores):

    exp_val = np.exp(scores - np.max(scores))
    prob = exp_val / np.sum(exp_val)
    
    parents = []
    for _ in range(10):
        parents.append(np.random.choice([i for i in range(len(population))], size= (2,), replace= False, p= prob))

    childrens = []
    for pair in parents:
        pi = population[pair[0]]
        pj = population[pair[1]]
        mod_i = prob[pair[0]]
        mod_j = prob[pair[1]]
        
        childrens.append(procreate_1(pi, pj, mod_i, mod_j))

    return childrens

In [171]:
P_MUTATION = 0.2
MUTATION_STRENGTH = 0.01

def mutate_1(p):

    Wm = deepcopy(p.get_weights())

    for i in range(len(Wm)):
        if P_MUTATION >= np.random.random():
            Wm[i] += np.random.normal(0, MUTATION_STRENGTH)

    return MyPlayer(max_depth= p.max_depth, min_weights= Wm)

def mutation(population):

    bases = np.random.choice([i for i in range(len(population))], size= (10,), replace= False)

    mutants = []
    for base in bases: mutants.append(mutate_1(population[base]))

    return mutants

EA

In [172]:
MAX_EPOCH = 10
MAX_POPULATION = 10
N_TRIALS = 20

population = [MyPlayer(max_depth= 1) for _ in range(MAX_POPULATION)]

for epoch in range(MAX_EPOCH):

    print(f'------------------------------------\nepoch {epoch}')

    scores = evaluate_population(population, N_TRIALS)

    idx_sort = np.argsort(scores)[::-1]
    scores = scores[idx_sort]
    population = [population[i] for i in idx_sort]

    print('scores')
    print(scores)

    # provare ad implementare morte dopo tot che non si è tra i primi tot
    #--------------------------------------

    to_death = []
    for i, p in enumerate(population):
        bonus_epochs = 4 if i < MAX_POPULATION / 2 else None
        if p.get_death(bonus_epochs) == 0: to_death.append((i, p))
    for i, p in to_death[::-1]:
        population.remove(p)
        scores = np.delete(scores, i)

    ## magari aumentare e diminuire aumento popolazione in base a grandezza popolazione

    print(f'before: {len(population)}')

    ## procreazione (probabilita dipendente da fitness)

    childrens = procreation(population, scores)
    print(f'n_childrens: {len(childrens)}')

    ## mutazioni (tante ma random su individui random)

    mutants = mutation(population)
    print(f'n_mutants: {len(mutants)}')

    for child in childrens: population.append(child)
    for mutant in mutants: population.append(mutant)

    print(f'after: {len(population)}')

    #

------------------------------------
epoch 0


100%|██████████| 10/10 [01:48<00:00, 10.89s/it]


scores
[20. 20. 20. 20. 20. 20. 20. 19. 19. 17.]
before: 10
n_childrens: 10
n_mutants: 10
after: 30
------------------------------------
epoch 1


100%|██████████| 30/30 [05:19<00:00, 10.67s/it]


scores
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 18. 18. 18. 18. 18. 18. 18. 17.]
before: 30
n_childrens: 10
n_mutants: 10
after: 50
------------------------------------
epoch 2


100%|██████████| 50/50 [08:27<00:00, 10.15s/it]


scores
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 18. 18. 18. 18. 17.]
before: 45
n_childrens: 10
n_mutants: 10
after: 65
------------------------------------
epoch 3


100%|██████████| 65/65 [10:57<00:00, 10.12s/it]


scores
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 20. 20. 20. 20. 20. 20. 19. 19. 19. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 18. 18. 18. 18. 18. 18. 17. 16.]
before: 54
n_childrens: 10
n_mutants: 10
after: 74
------------------------------------
epoch 4


100%|██████████| 74/74 [12:35<00:00, 10.21s/it]


scores
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 18. 18. 18. 18. 18. 18.
 17. 17.]
before: 55
n_childrens: 10
n_mutants: 10
after: 75
------------------------------------
epoch 5


100%|██████████| 75/75 [12:45<00:00, 10.20s/it]


scores
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 18. 18. 18. 18. 18. 18.
 18. 17. 17.]
before: 57
n_childrens: 10
n_mutants: 10
after: 77
------------------------------------
epoch 6


100%|██████████| 77/77 [13:18<00:00, 10.37s/it]


scores
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 18. 18. 18. 18. 18. 18. 18.
 18. 18. 18. 17. 17.]
before: 57
n_childrens: 10
n_mutants: 10
after: 77
------------------------------------
epoch 7


100%|██████████| 77/77 [13:09<00:00, 10.26s/it]


scores
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 18. 18. 18. 18. 18. 18. 18. 18.
 18. 18. 18. 17. 17.]
before: 54
n_childrens: 10
n_mutants: 10
after: 74
------------------------------------
epoch 8


100%|██████████| 74/74 [12:38<00:00, 10.25s/it]


scores
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 18. 18. 18. 18. 18. 18. 18. 18. 18. 17.
 17. 17.]
before: 55
n_childrens: 10
n_mutants: 10
after: 75
------------------------------------
epoch 9


100%|██████████| 75/75 [12:53<00:00, 10.31s/it]

scores
[20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 18. 18. 18. 18. 18.
 18. 18. 17.]
before: 57
n_childrens: 10
n_mutants: 10
after: 77





test

In [174]:
ThePlayer = population[0]

wins_first = 0
wins_second = 0
n_trials = 1000

for i_g in tqdm(range(n_trials)):
#for i_g in range(n_trials):

    #print('==============================================================================')
    #print('==============================================================================')
    #print(f'game {i_g+1} - Player First (0)')

    game = Game()
    winner = game.play(ThePlayer, RandomPlayer())
    if winner == 0: wins_first += 1

    #print(f'game {i_g+1} - Player Second (1)')

    game = Game()
    winner = game.play(RandomPlayer(), ThePlayer)
    if winner == 1: wins_second += 1

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

100%|██████████| 1000/1000 [13:29<00:00,  1.24it/s]

Player won 965 / 1000 as first
Player won 976 / 1000 as second



