In [1]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

Dummy game class to leave the real Game class untouched

In [2]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self): return self._board

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

Computing all legal moves (when the board is empty)

In [3]:
border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16


each agent in the population will evaluate differently the board, trying to achieve positive values for boards good for player_0 and negative values for boards good for player_1

In [4]:
class MyPlayer(Player):
    def __init__(self, weights= None, base_until_move_change= 10) -> None:
        super().__init__()

        if weights is None: self.weights = np.random.random(size= (5, 5))
        else: self.weights = weights

        self.dummy = Dummy_Game()

        self.base_until_move_change = base_until_move_change
        self.last_pos_move = None
        self.until_move_change = self.base_until_move_change

        self.epoch_before_death = 3

    def reset_counters(self):
        self.until_move_change = self.base_until_move_change

    def get_weights(self): return self.weights

    def get_death(self, bonus= None):
        if bonus is not None: self.epoch_before_death = bonus + 1
        self.epoch_before_death -= 1
        return self.epoch_before_death
    
    def evaluate_board(self, board):

        eval_0 = sum(sum(np.array(board==0, dtype= int) * self.weights))
        eval_1 = sum(sum(np.array(board==1, dtype= int) * self.weights))

        return eval_0 - eval_1

    def selection(self, state, player_id):

        board = np.array(state).reshape(5, 5)

        moves = []
        evals = []

        for from_pos, move in ALL_MOVES:
            
            new_board, ok = self.dummy.single_move(board, from_pos, move, player_id)
            if ok:

                evals.append(self.evaluate_board(new_board))
                moves.append((from_pos, move))

        if player_id == 0: idx = np.argmax(evals)
        else: idx = np.argmin(evals)
        
        return moves[idx]

    def eval_agent(self, n_games= 100):

        count_win = 0

        for _ in range(n_games):

            board = np.ones((5, 5), dtype= np.uint8) * -1
            next_to_move = 0
            state = tuple(board.flatten())

            winner = -1
            while winner == -1:

                if next_to_move == 0: from_pos, move = self.selection(state, next_to_move)
                else:
                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                    while board[from_pos[1], from_pos[0]] == 1 - next_to_move: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                board, ok = self.dummy.single_move(board, from_pos, move, next_to_move)

                next_to_move = 1 - next_to_move
                state = tuple(board.flatten())

                winner = self.dummy.check_winner_board(board)
        
            if winner == 0: count_win += 1

        return count_win

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        board = game.get_board()
        state = tuple(board.flatten())

        pos_move = self.selection(state, game.current_player_idx)

        if pos_move == self.last_pos_move:
            self.until_move_change -= 1
            if self.until_move_change == 0:
                board = game.get_board()
                player_id = game.current_player_idx

                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                while board[from_pos[1], from_pos[0]] == 1 - player_id: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                self.until_move_change = self.base_until_move_change
        else: self.last_pos_move = pos_move

        from_pos, move = pos_move
        
        #print((from_pos, move))
        #print('---------------')
        return from_pos, move

functions for the EA

In [5]:
def evaluate_population(population, n_trials= 100):

    len_p = len(population)

    scores = np.zeros(shape= (len_p,))

    for i in tqdm(range(len_p)):

        scores[i] += population[i].eval_agent(n_trials)

    return np.array(scores)

In [6]:
def procreate_1(p1, p2, c1, c2):
    
    W1 = p1.get_weights()
    W2 = p2.get_weights()

    W3 = deepcopy(W1)

    c = c1 / (c1 + c2)

    for i in range(len(W1)):
        for j in range(len(W1[i])):
            if c < np.random.random():
                W3[i][j] = W2[i][j]

    return MyPlayer(weights= W3)

def procreation(population, scores):

    exp_val = np.exp(scores - np.max(scores))
    prob = exp_val / np.sum(exp_val)

    #print('procreation probability - no replace')
    #print(prob)
    
    parents = []
    for _ in range(15):
        parents.append(np.random.choice([i for i in range(len(population))], size= (2,), replace= False, p= prob))

    childrens = []
    for pair in parents:
        pi = population[pair[0]]
        pj = population[pair[1]]
        mod_i = prob[pair[0]]
        mod_j = prob[pair[1]]
        
        childrens.append(procreate_1(pi, pj, mod_i, mod_j))

    return childrens

In [7]:
P_MUTATION = 0.2
MUTATION_STRENGTH = 0.01

def mutate_1(p):

    Wm = deepcopy(p.get_weights())

    for i in range(len(Wm)):
        for j in range(len(Wm[i])):
            if P_MUTATION >= np.random.random():
                Wm[i][j] += np.random.normal(0, MUTATION_STRENGTH)

    return MyPlayer(weights= Wm)

def mutation(population):

    bases = np.random.choice([i for i in range(len(population))], size= (15,), replace= False)

    mutants = []
    for base in bases: mutants.append(mutate_1(population[base]))

    return mutants

EA

In [8]:
MAX_EPOCH = 10
MAX_POPULATION = 20
N_TRIALS = 100

population = [MyPlayer() for _ in range(MAX_POPULATION)]

for epoch in range(MAX_EPOCH):

    print(f'------------------------------------\nepoch {epoch}')

    scores = evaluate_population(population, N_TRIALS)

    idx_sort = np.argsort(scores)[::-1]
    scores = scores[idx_sort]
    population = [population[i] for i in idx_sort]

    print('scores')
    print(scores)

    # provare ad implementare morte dopo tot che non si è tra i primi tot
    #--------------------------------------

    to_death = []
    for i, p in enumerate(population):
        bonus_epochs = 4 if i < MAX_POPULATION / 2 else None
        if p.get_death(bonus_epochs) == 0: to_death.append((i, p))
    for i, p in to_death[::-1]:
        population.remove(p)
        scores = np.delete(scores, i)

    ## magari aumentare e diminuire aumento popolazione in base a grandezza popolazione

    print(f'before: {len(population)}')

    ## procreazione (probabilita dipendente da fitness)

    childrens = procreation(population, scores)
    print(f'n_childrens: {len(childrens)}')

    ## mutazioni (tante ma random su individui random)

    mutants = mutation(population)
    print(f'n_mutants: {len(mutants)}')

    for child in childrens: population.append(child)
    for mutant in mutants: population.append(mutant)

    print(f'after: {len(population)}')

    #

------------------------------------
epoch 0


  0%|          | 0/20 [00:00<?, ?it/s]

100%|██████████| 20/20 [00:41<00:00,  2.10s/it]


[99. 95. 93. 92. 92. 92. 92. 91. 89. 88. 87. 87. 86. 86. 86. 85. 83. 81.
 79. 77.]
-
[99. 95. 93. 92. 92. 92. 92. 91. 89. 88.]
before: 20
n_childrens: 15
n_mutants: 15
after: 50
------------------------------------
epoch 1


100%|██████████| 50/50 [01:40<00:00,  2.01s/it]


[100. 100. 100. 100.  99.  99.  99.  99.  98.  98.  98.  98.  97.  97.
  97.  96.  96.  96.  96.  95.  92.  92.  92.  92.  92.  92.  92.  92.
  91.  91.  91.  91.  90.  90.  89.  89.  88.  88.  88.  87.  87.  86.
  86.  86.  86.  85.  85.  85.  83.  81.]
-
[100. 100. 100. 100.  99.  99.  99.  99.  98.  98.]
before: 50
n_childrens: 15
n_mutants: 15
after: 80
------------------------------------
epoch 2


100%|██████████| 80/80 [02:29<00:00,  1.87s/it]


[100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.  98.  98.
  97.  97.  97.  97.  97.  96.  96.  96.  96.  96.  95.  95.  95.  95.
  95.  95.  95.  94.  94.  93.  92.  91.  91.  91.  91.  90.  90.  90.
  89.  89.  89.  89.  89.  89.  89.  89.  89.  88.  88.  88.  87.  87.
  86.  86.  86.  85.  85.  84.  84.  83.  78.  74.]
-
[100. 100. 100. 100. 100. 100. 100. 100. 100. 100.]
before: 70
n_childrens: 15
n_mutants: 15
after: 100
------------------------------------
epoch 3


100%|██████████| 100/100 [02:51<00:00,  1.71s/it]


[100. 100. 100. 100. 100. 100. 100. 100. 100.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  96.  96.  96.  96.  96.  95.  95.  95.  95.  95.
  94.  93.  93.  92.  92.  92.  92.  92.  91.  91.  91.  91.  91.  90.
  90.  90.  89.  89.  88.  87.  87.  87.  87.  86.  85.  85.  85.  83.
  81.  77.]
-
[100. 100. 100. 100. 100. 100. 100. 100. 100.  99.]
before: 80
n_childrens: 15
n_mutants: 15
after: 110
------------------------------------
epoch 4


100%|██████████| 110/110 [02:57<00:00,  1.62s/it]


[100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100. 100. 100. 100.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  97.  97.  97.  97.  97.  97.  97.  97.  97.  96.
  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  95.  95.  95.  95.
  95.  94.  94.  94.  94.  93.  93.  92.  92.  91.  91.  91.  90.  90.
  90.  90.  89.  89.  88.  87.  87.  87.  82.  82.  81.  77.]
-
[100. 100. 100. 100. 100. 100. 100. 100. 100. 100.]
before: 82
n_childrens: 15
n_mutants: 15
after: 112
------------------------------------
epoch 5


100%|██████████| 112/112 [03:12<00:00,  1.72s/it]


[100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100. 100. 100. 100. 100. 100.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  97.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  96.  96.  96.  96.  96.
  95.  95.  95.  95.  94.  94.  93.  93.  92.  92.  90.  90.  88.  86.]
-
[100. 100. 100. 100. 100. 100. 100. 100. 100. 100.]
before: 86
n_childrens: 15
n_mutants: 15
after: 116
------------------------------------
epoch 6


100%|██████████| 116/116 [03:19<00:00,  1.72s/it]


[100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  96.  96.
  96.  96.  96.  96.  96.  96.  96.  96.  95.  95.  94.  94.  94.  92.
  92.  91.  90.  86.]
-
[100. 100. 100. 100. 100. 100. 100. 100. 100. 100.]
before: 91
n_childrens: 15
n_mutants: 15
after: 121
------------------------------------
epoch 7


100%|██████████| 121/121 [03:25<00:00,  1.70s/it]


[100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100. 100. 100. 100.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  97.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  97.  96.  96.  96.  96.  96.  96.  96.  96.  95.
  95.  95.  95.  94.  94.  93.  92.  92.  83.]
-
[100. 100. 100. 100. 100. 100. 100. 100. 100. 100.]
before: 87
n_childrens: 15
n_mutants: 15
after: 117
------------------------------------
epoch 8


100%|██████████| 117/117 [03:10<00:00,  1.63s/it]


[100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.
  95.  95.  94.  92.  91.]
-
[100. 100. 100. 100. 100. 100. 100. 100. 100. 100.]
before: 86
n_childrens: 15
n_mutants: 15
after: 116
------------------------------------
epoch 9


100%|██████████| 116/116 [02:52<00:00,  1.48s/it]

[100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  96.  96.
  96.  95.  95.  94.]
-
[100. 100. 100. 100. 100. 100. 100. 100. 100. 100.]
before: 87
n_childrens: 15
n_mutants: 15
after: 117





In [13]:
ThePlayer = population[0]
print(ThePlayer.get_weights())

[[0.59923711 0.00127436 0.29274716 0.09691764 0.96716405]
 [0.45680423 0.31143271 0.19930104 0.55172626 0.63428148]
 [0.63707781 0.98578032 0.12140629 0.50865789 0.96773215]
 [0.44541251 0.88323028 0.54425298 0.43068464 0.69010507]
 [0.50936949 0.59367132 0.96148384 0.21368317 0.66575189]]


In [12]:
wins_first = 0
wins_second = 0
n_trials = 1000

player1 = ThePlayer
player2 = RandomPlayer()

for _ in tqdm(range(n_trials)):
    g = Game()

    winner = g.play(player1, player2)

    if winner == 0: wins_first += 1

    g = Game()

    winner = g.play(player2, player1)

    if winner == 1: wins_second += 1

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:28<00:00, 35.37it/s]

Player won 982 / 1000 as first
Player won 984 / 1000 as second



