In [1]:
import random
import numpy as np
from game import Game, Move, Player
from copy import deepcopy
from tqdm import tqdm

In [2]:
class Dummy_Game(object):
    def __init__(self) -> None:
        self._board = np.ones((5, 5), dtype=np.uint8) * -1
        self.current_player_idx = 1

    def get_board(self): return self._board

    def single_move(self, board, from_pos, move, player_id):
        self._board = deepcopy(board)
        self.current_player_idx = player_id
        ok = self.__move(from_pos, move, player_id)
        return deepcopy(self._board), ok
    
    def check_winner_board(self, board):
        self._board = board
        return self.check_winner()

    def check_winner(self) -> int:
        for x in range(self._board.shape[0]):
            if self._board[x, 0] != -1 and all(self._board[x, :] == self._board[x, 0]): return self._board[x, 0]
        for y in range(self._board.shape[1]):
            if self._board[0, y] != -1 and all(self._board[:, y] == self._board[0, y]): return self._board[0, y]
        if self._board[0, 0] != -1 and all([self._board[x, x] for x in range(self._board.shape[0])] == self._board[0, 0]): return self._board[0, 0]
        if self._board[0, -1] != -1 and all([self._board[x, -(x + 1)] for x in range(self._board.shape[0])] == self._board[0, -1]): return self._board[0, -1]
        return -1

    def __move(self, from_pos: tuple[int, int], slide: Move, player_id: int) -> bool:
        if player_id > 2: return False
        prev_value = deepcopy(self._board[(from_pos[1], from_pos[0])])
        acceptable = self.__take((from_pos[1], from_pos[0]), player_id)
        if acceptable:
            acceptable = self.__slide((from_pos[1], from_pos[0]), slide)
            if not acceptable: self._board[(from_pos[1], from_pos[0])] = deepcopy(prev_value)
        return acceptable

    def __take(self, from_pos: tuple[int, int], player_id: int) -> bool:
        acceptable: bool = ((from_pos[0] == 0 and from_pos[1] < 5) or (from_pos[0] == 4 and from_pos[1] < 5) or (from_pos[1] == 0 and from_pos[0] < 5) or (from_pos[1] == 4 and from_pos[0] < 5)) and (self._board[from_pos] < 0 or self._board[from_pos] == player_id)
        if acceptable: self._board[from_pos] = player_id
        return acceptable

    def __slide(self, from_pos: tuple[int, int], slide: Move) -> bool:
        SIDES = [(0, 0), (0, 4), (4, 0), (4, 4)]
        if from_pos not in SIDES:
            acceptable_top: bool = from_pos[0] == 0 and (slide == Move.BOTTOM or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_bottom: bool = from_pos[0] == 4 and (slide == Move.TOP or slide == Move.LEFT or slide == Move.RIGHT)
            acceptable_left: bool = from_pos[1] == 0 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos[1] == 4 and (slide == Move.BOTTOM or slide == Move.TOP or slide == Move.LEFT)
        else:
            acceptable_top: bool = from_pos == (0, 0) and (slide == Move.BOTTOM or slide == Move.RIGHT)
            acceptable_left: bool = from_pos == (4, 0) and (slide == Move.TOP or slide == Move.RIGHT)
            acceptable_right: bool = from_pos == (0, 4) and (slide == Move.BOTTOM or slide == Move.LEFT)
            acceptable_bottom: bool = from_pos == (4, 4) and (slide == Move.TOP or slide == Move.LEFT)
        acceptable: bool = acceptable_top or acceptable_bottom or acceptable_left or acceptable_right
        if acceptable:
            piece = self._board[from_pos]
            if slide == Move.LEFT:
                for i in range(from_pos[1], 0, -1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i - 1)]
                self._board[(from_pos[0], 0)] = piece
            elif slide == Move.RIGHT:
                for i in range(from_pos[1], self._board.shape[1] - 1, 1): self._board[(from_pos[0], i)] = self._board[(from_pos[0], i + 1)]
                self._board[(from_pos[0], self._board.shape[1] - 1)] = piece
            elif slide == Move.TOP:
                for i in range(from_pos[0], 0, -1): self._board[(i, from_pos[1])] = self._board[(i - 1, from_pos[1])]
                self._board[(0, from_pos[1])] = piece
            elif slide == Move.BOTTOM:
                for i in range(from_pos[0], self._board.shape[0] - 1, 1): self._board[(i, from_pos[1])] = self._board[(i + 1, from_pos[1])]
                self._board[(self._board.shape[0] - 1, from_pos[1])] = piece
        return acceptable

In [3]:
border = []
for i in range(5):
    for j in range(5):
        if i == 0 or i == 4 or j == 0 or j == 4:
            border.append((i, j))
BORDER = (list(set(border)))
print(len(BORDER))

def tile_to_moves(tile):
    possible_moves = [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]
        
    if tile[0] == 0: possible_moves.remove(Move.LEFT)
    if tile[0] == 4: possible_moves.remove(Move.RIGHT)
    if tile[1] == 0: possible_moves.remove(Move.TOP)
    if tile[1] == 4: possible_moves.remove(Move.BOTTOM)

    return possible_moves

tile_moves = {tile: tile_to_moves(tile) for tile in BORDER}

ALL_MOVES = []
for tile in BORDER:
    possible_moves = tile_moves[tile]
    for move in possible_moves: ALL_MOVES.append((tile, move))
N_ALL = len(ALL_MOVES)

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        from_pos = random.choice(BORDER)
        while game.get_board()[from_pos[1], from_pos[0]] == 1 - game.current_player_idx: from_pos = random.choice(BORDER)

        possible_moves = tile_moves[from_pos]
        
        move = random.choice(possible_moves)

        return from_pos, move

16


In [4]:
x = np.ones((5, 5), dtype= np.uint8) * -1

count_error = 0
count_missing = 0

for i in range(5):
    for j in range(5):
        for move in [Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT]:
            if ((i, j), move) in ALL_MOVES:
                
                if not Dummy_Game().single_move(x, (i, j), move, 0)[1]:
                    print('ERROR')
                    print((i, j), move)
                    count_error += 1

            if not Dummy_Game().single_move(x, (i, j), move, 0)[1]:

                if ((i, j), move) in ALL_MOVES:
                    print('MISSING')
                    print((i, j), move)
                    count_missing += 1

print(count_error)
print(count_missing)

0
0


In [5]:
import numpy as np

def state_to_board(state):
    binary_string = format(state, '050b')
    binary_array = np.array(list(map(int, binary_string))).reshape(2, 5, 5)

    board = np.zeros((5, 5), dtype=int)
    board[binary_array[0] == 1] = -1
    board[binary_array[1] == 1] = 1

    return board

def board_to_state(board):
    binary_array = np.zeros((2, 5, 5), dtype=int)
    
    binary_array[0][board == -1] = 1
    binary_array[1][board == 1] = 1

    binary_string = ''.join(map(str, binary_array.flatten()))
    return int(binary_string, 2)



rand_board = np.random.choice([-1, 0, 1], size=(5, 5), replace=True)
print('Board:')
print(rand_board)

rand_state = board_to_state(rand_board)
rand_board = state_to_board(rand_state)

print('\nState:')
print(rand_state)
print('\nBoard:')
print(state_to_board(rand_state))

Board:
[[ 0  1  0 -1 -1]
 [ 1  0 -1 -1 -1]
 [ 1  1 -1 -1  0]
 [-1  0  1 -1  0]
 [ 0 -1  0  1  1]]

State:
113475460817027

Board:
[[ 0  1  0 -1 -1]
 [ 1  0 -1 -1 -1]
 [ 1  1 -1 -1  0]
 [-1  0  1 -1  0]
 [ 0 -1  0  1  1]]


In [6]:
dict_rot = {
    (Move.TOP, 1): Move.RIGHT,
    (Move.TOP, 2): Move.BOTTOM,
    (Move.TOP, 3): Move.LEFT,
    (Move.BOTTOM, 1): Move.LEFT,
    (Move.BOTTOM, 2): Move.TOP,
    (Move.BOTTOM, 3): Move.RIGHT,
    (Move.LEFT, 1): Move.TOP,
    (Move.LEFT, 2): Move.RIGHT,
    (Move.LEFT, 3): Move.BOTTOM,
    (Move.RIGHT, 1): Move.BOTTOM,
    (Move.RIGHT, 2): Move.LEFT,
    (Move.RIGHT, 3): Move.TOP,
}

dict_flip = {
    Move.TOP: Move.TOP,
    Move.BOTTOM: Move.BOTTOM,
    Move.LEFT: Move.RIGHT,
    Move.RIGHT: Move.LEFT,
}

#rot_orario: (3, 4) -> (4, 1) -> (1, 0) -> (0, 3) -> (3, 4)
#: (xi, yi) -> (yi, 4 - xi)
#rot_anti_orario: (3, 4) -> (0, 3) -> (1, 0) -> (4, 1) -> (3, 4)
#: (xi, yi) -> (4 - yi, xi)

def rot(n_rot):
    def rot_n(from_pos, move):
        for _ in range(n_rot):
            from_pos = 4 - from_pos[1], from_pos[0]
        return from_pos, dict_rot[(move, n_rot)]
    return rot_n

def flip(from_pos, move):
    from_pos = 4 - from_pos[0], from_pos[1]
    return from_pos, dict_flip[move]

def flip_rot(n_rot):
    def flip_rot_n(from_pos, move):
        from_pos, move = rot(n_rot)(from_pos, move)
        return flip(from_pos, move)
    return flip_rot_n

rot1 = rot(1)
rot2 = rot(2)
rot3 = rot(3)
flip_rot1 = flip_rot(1)
flip_rot2 = flip_rot(2)
flip_rot3 = flip_rot(3)

verse_simmetries = [
    rot3,
    rot2,
    rot1,
    flip,
    flip_rot3,
    flip_rot2,
    flip_rot1,
]

inverse_simmetries = [
    rot1,
    rot2,
    rot3,
    flip,
    flip_rot1,
    flip_rot2,
    flip_rot3,
]

def check_simmetries(board, state_list):

    base_state = tuple(board.flatten())
    if base_state in state_list: return base_state, None

    R1 = np.rot90(board)
    base_state = tuple(R1.flatten())
    if base_state in state_list: return base_state, 0

    R2 = np.rot90(R1)
    base_state = tuple(R2.flatten())
    if base_state in state_list: return base_state, 1

    R3 = np.rot90(R2)
    base_state = tuple(R3.flatten())
    if base_state in state_list: return base_state, 2
    
    F = np.fliplr(board)
    base_state = tuple(F.flatten())
    if base_state in state_list: return base_state, 3
    
    FR1 = np.rot90(F)
    base_state = tuple(FR1.flatten())
    if base_state in state_list: return base_state, 4
    
    FR2 = np.rot90(FR1)
    base_state = tuple(FR2.flatten())
    if base_state in state_list: return base_state, 5
    
    FR3 = np.rot90(FR2)
    base_state = tuple(FR3.flatten())
    if base_state in state_list: return base_state, 6
    
    return None

MOVES_SIMMETRIES = {} #(id_move, id_simmetry) -> id_move

for id_move in range(len(ALL_MOVES)):
    from_pos, move = ALL_MOVES[id_move]

    for id_simmetry in range(len(inverse_simmetries)):

        idx = None
        for i in range(len(ALL_MOVES)):
            if ALL_MOVES[i] == inverse_simmetries[id_simmetry](from_pos, move):
                idx = i
                break
        
        MOVES_SIMMETRIES[(id_move, id_simmetry)] = i

print(len(MOVES_SIMMETRIES))
print(len(ALL_MOVES) * 7)
c = 10
for k, v in MOVES_SIMMETRIES.items():
    print((k, v))
    c -= 1
    if c == 0: break

308
308
((0, 0), 31)
((0, 1), 18)
((0, 2), 27)
((0, 3), 41)
((0, 4), 39)
((0, 5), 21)
((0, 6), 14)
((1, 0), 30)
((1, 1), 17)
((1, 2), 28)


In [7]:
board = np.array([[0, 1, -1], [1, 0, -1], [0, 1, -1]])
print(np.array(board==0, dtype= int))

[[1 0 0]
 [0 1 0]
 [1 0 0]]


In [8]:
class MyPlayer(Player):
    def __init__(self, weights= None, base_until_move_change= 10) -> None:
        super().__init__()

        if weights is None:
            self.weights = np.random.random(size= (5, 5))
            self.weights_n_move = np.random.random(size= (5, 5))
        else: 
            self.weights = weights[0]
            self.weights_n_move = weights[1]

        self.dummy = Dummy_Game()

        self.base_until_move_change = base_until_move_change
        self.last_pos_move = None
        self.until_move_change = self.base_until_move_change

        self.epoch_before_death = 3

        self.n_move = 0

    def reset_counters(self):
        self.until_move_change = self.base_until_move_change

    def get_weights(self): return deepcopy(self.weights), deepcopy(self.weights_n_move)

    def get_death(self, bonus= None):
        if bonus is not None: self.epoch_before_death = bonus + 1
        self.epoch_before_death -= 1
        return self.epoch_before_death
    
    def evaluate_board(self, board, n_move):

        eval_0 = sum(sum(np.array(board==0, dtype= int) * (self.weights + n_move * self.weights_n_move)))
        eval_1 = sum(sum(np.array(board==1, dtype= int) * (self.weights + n_move * self.weights_n_move)))

        return eval_0 - eval_1

    def selection(self, state, player_id, n_move):

        board = np.array(state).reshape(5, 5)

        moves = []
        evals = []

        for from_pos, move in ALL_MOVES:
            
            new_board, ok = self.dummy.single_move(board, from_pos, move, player_id)
            if ok:

                evals.append(self.evaluate_board(new_board, n_move))
                moves.append((from_pos, move))

        if player_id == 0: idx = np.argmax(evals)
        else: idx = np.argmin(evals)
        
        return moves[idx]

    def eval_agent(self, n_games= 100):

        count_win = 0

        for _ in range(n_games):

            board = np.ones((5, 5), dtype= np.uint8) * -1
            next_to_move = 0
            state = tuple(board.flatten())
            n_move = 1

            winner = -1
            while winner == -1:

                if next_to_move == 0: from_pos, move = self.selection(state, next_to_move, n_move)
                else:
                    from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                    while board[from_pos[1], from_pos[0]] == 1 - next_to_move: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                    n_move += 1

                board, ok = self.dummy.single_move(board, from_pos, move, next_to_move)

                next_to_move = 1 - next_to_move
                state = tuple(board.flatten())

                winner = self.dummy.check_winner_board(board)
        
            if winner == 0: count_win += 1

        return count_win

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:

        board = game.get_board()
        state = tuple(board.flatten())

        if sum(sum(board == -1)) > 23: self.n_move = 1
        else: self.n_move += 1

        pos_move = self.selection(state, game.current_player_idx, self.n_move)

        if pos_move == self.last_pos_move:
            self.until_move_change -= 1
            if self.until_move_change == 0:
                board = game.get_board()
                player_id = game.current_player_idx

                from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]
                while board[from_pos[1], from_pos[0]] == 1 - player_id: from_pos, move = ALL_MOVES[np.random.randint(0, N_ALL)]

                self.until_move_change = self.base_until_move_change
        else: self.last_pos_move = pos_move

        from_pos, move = pos_move
        
        #print((from_pos, move))
        #print('---------------')
        return from_pos, move

In [9]:
def eval_against(p1, p2, dummy):

    until_draw = 100
    
    board = np.ones((5, 5), dtype= np.uint8) * -1
    next_to_move = 0
    state = tuple(board.flatten())

    winner = -1
    while winner == -1:

        if next_to_move == 0: from_pos, move = p1.selection(state, next_to_move)
        else: from_pos, move = p2.selection(state, next_to_move)

        board, ok = dummy.single_move(board, from_pos, move, next_to_move)

        next_to_move = 1 - next_to_move
        state = tuple(board.flatten())

        winner = dummy.check_winner_board(board)

        if winner == -1:
            until_draw -= 1
            if until_draw == 0: return -1

    return winner

def evaluate_population(population, n_trials= 100):

    #dummy = Dummy_Game()

    len_p = len(population)

    scores = np.zeros(shape= (len_p,))

    for i in tqdm(range(len_p)):

        scores[i] += population[i].eval_agent(n_trials)

        #for j in range(len_p):
        #    if i != j:
        #        winner = eval_against(population[i], population[j], dummy)
        #        if winner == 0: scores[i] += 1
        #        elif winner == 1: scores[j] += 1

    return np.array(scores)

In [10]:
def procreate_1(p1, p2, c1, c2):
    
    W1, W1_moves = p1.get_weights()

    W2, W2_moves = p2.get_weights()

    W3, W3_moves = deepcopy(W1), deepcopy(W1_moves)

    c = c1 / (c1 + c2)

    for i in range(len(W1)):
        for j in range(len(W1[i])):
            if c < np.random.random():
                W3[i][j] = W2[i][j]

            if c < np.random.random():
                W3_moves[i][j] = W2_moves[i][j]

    return MyPlayer(weights= (W3, W3_moves))

def procreate_2(p1, p2, c1, c2):

    W1 = p1.get_weights()
    W1_moves = p1.get_weights()

    W2 = p2.get_weights()
    W2_moves = p2.get_weights()

    c = c1 / (c1 + c2)

    if c < random.random(): W3, W3_moves = deepcopy(W2), deepcopy(W1_moves)
    else: W3, W3_moves = deepcopy(W1), deepcopy(W2_moves)

    return MyPlayer(weights= (W3, W3_moves))

def procreation(population, scores):

    exp_val = np.exp(scores - np.max(scores))
    prob = exp_val / np.sum(exp_val)
    
    parents = []
    for _ in range(15):
        parents.append(np.random.choice([i for i in range(len(population))], size= (2,), replace= False, p= prob))

    childrens = []
    for pair in parents:
        pi = population[pair[0]]
        pj = population[pair[1]]
        mod_i = prob[pair[0]]
        mod_j = prob[pair[1]]
        
        childrens.append(procreate_1(pi, pj, mod_i, mod_j))

    return childrens

In [11]:
P_MUTATION = 0.2
MUTATION_STRENGTH = 0.01

def mutate_1(p):

    Wm, Wm_move = p.get_weights()

    for i in range(len(Wm)):
        for j in range(len(Wm[i])):
            if P_MUTATION >= np.random.random():
                Wm[i][j] += np.random.normal(0, MUTATION_STRENGTH)
                
            if P_MUTATION >= np.random.random():
                Wm_move[i][j] += np.random.normal(0, MUTATION_STRENGTH)

    return MyPlayer(weights= (Wm, Wm_move))

def mutation(population):

    bases = np.random.choice([i for i in range(len(population))], size= (15,), replace= False)

    mutants = []
    for base in bases: mutants.append(mutate_1(population[base]))

    return mutants

In [12]:
MAX_EPOCH = 10
MAX_POPULATION = 20
N_TRIALS = 100

population = [MyPlayer() for _ in range(MAX_POPULATION)]

for epoch in range(MAX_EPOCH):

    print(f'------------------------------------\nepoch {epoch}')

    scores = evaluate_population(population, N_TRIALS)

    idx_sort = np.argsort(scores)[::-1]
    scores = scores[idx_sort]
    population = [population[i] for i in idx_sort]

    print(scores)
    print('-')
    print(scores[:10])

    # provare ad implementare morte dopo tot che non si è tra i primi tot
    #--------------------------------------

    to_death = []
    for i, p in enumerate(population):
        bonus_epochs = 4 if i < MAX_POPULATION / 2 else None
        if p.get_death(bonus_epochs) == 0: to_death.append((i, p))
    for i, p in to_death[::-1]:
        population.remove(p)
        scores = np.delete(scores, i)

    ## magari aumentare e diminuire aumento popolazione in base a grandezza popolazione

    print(f'before: {len(population)}')

    ## procreazione (probabilita dipendente da fitness)

    childrens = procreation(population, scores)
    print(f'n_childrens: {len(childrens)}')

    ## mutazioni (tante ma random su individui random)

    mutants = mutation(population)
    print(f'n_mutants: {len(mutants)}')

    for child in childrens: population.append(child)
    for mutant in mutants: population.append(mutant)

    print(f'after: {len(population)}')

    #

------------------------------------
epoch 0


  0%|          | 0/20 [00:00<?, ?it/s]

100%|██████████| 20/20 [00:59<00:00,  3.00s/it]


[98. 96. 96. 95. 95. 94. 93. 93. 92. 92. 90. 89. 87. 87. 86. 85. 84. 81.
 81. 71.]
-
[98. 96. 96. 95. 95. 94. 93. 93. 92. 92.]
before: 20
n_childrens: 15
n_mutants: 15
after: 50
------------------------------------
epoch 1


100%|██████████| 50/50 [02:37<00:00,  3.16s/it]


[99. 98. 98. 97. 97. 97. 97. 97. 96. 96. 96. 96. 95. 95. 95. 94. 94. 93.
 93. 92. 92. 92. 92. 91. 91. 91. 91. 91. 90. 90. 89. 88. 87. 87. 87. 87.
 86. 86. 86. 86. 86. 84. 84. 83. 83. 83. 82. 76. 69. 68.]
-
[99. 98. 98. 97. 97. 97. 97. 97. 96. 96.]
before: 50
n_childrens: 15
n_mutants: 15
after: 80
------------------------------------
epoch 2


100%|██████████| 80/80 [04:21<00:00,  3.27s/it]


[99. 99. 99. 98. 98. 98. 98. 98. 97. 97. 97. 97. 97. 97. 97. 96. 96. 96.
 96. 96. 96. 96. 95. 95. 95. 95. 95. 95. 95. 95. 95. 94. 94. 94. 94. 94.
 94. 94. 94. 93. 93. 93. 92. 92. 92. 92. 91. 91. 91. 91. 91. 91. 90. 90.
 89. 89. 89. 89. 89. 88. 88. 88. 87. 87. 87. 87. 85. 85. 84. 84. 84. 83.
 82. 82. 81. 80. 77. 74. 68. 59.]
-
[99. 99. 99. 98. 98. 98. 98. 98. 97. 97.]
before: 70
n_childrens: 15
n_mutants: 15
after: 100
------------------------------------
epoch 3


100%|██████████| 100/100 [05:21<00:00,  3.22s/it]


[99. 99. 99. 99. 99. 99. 99. 99. 98. 98. 98. 98. 98. 98. 98. 98. 98. 98.
 98. 98. 97. 97. 97. 97. 97. 97. 97. 97. 97. 96. 96. 96. 96. 96. 96. 96.
 96. 96. 95. 95. 95. 95. 94. 94. 94. 94. 94. 94. 94. 94. 94. 94. 94. 94.
 94. 94. 93. 93. 93. 93. 93. 93. 93. 92. 92. 92. 92. 92. 91. 91. 91. 91.
 91. 91. 90. 90. 90. 90. 89. 89. 89. 89. 89. 89. 88. 86. 85. 85. 84. 84.
 84. 83. 83. 83. 81. 79. 76. 75. 74. 70.]
-
[99. 99. 99. 99. 99. 99. 99. 99. 98. 98.]
before: 79
n_childrens: 15
n_mutants: 15
after: 109
------------------------------------
epoch 4


100%|██████████| 109/109 [04:40<00:00,  2.57s/it]


[100. 100.  99.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  96.  96.
  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  95.  95.  95.  95.
  95.  95.  95.  95.  95.  94.  94.  94.  94.  94.  94.  93.  93.  93.
  93.  93.  93.  93.  93.  93.  92.  92.  92.  92.  92.  92.  92.  92.
  92.  92.  92.  91.  91.  91.  90.  90.  90.  90.  89.  89.  89.  88.
  88.  88.  87.  87.  86.  86.  85.  84.  83.  83.  81.]
-
[100. 100.  99.  99.  99.  99.  99.  99.  99.  99.]
before: 84
n_childrens: 15
n_mutants: 15
after: 114
------------------------------------
epoch 5


100%|██████████| 114/114 [04:26<00:00,  2.34s/it]


[100. 100. 100.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  96.  96.  96.
  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.
  96.  96.  96.  96.  96.  95.  95.  95.  95.  95.  95.  95.  95.  95.
  94.  94.  94.  94.  94.  94.  94.  94.  94.  93.  93.  93.  93.  92.
  92.  92.  92.  92.  91.  91.  90.  90.  89.  89.  87.  85.  83.  83.
  79.  76.]
-
[100. 100. 100.  99.  99.  99.  99.  99.  99.  99.]
before: 84
n_childrens: 15
n_mutants: 15
after: 114
------------------------------------
epoch 6


100%|██████████| 114/114 [04:21<00:00,  2.30s/it]


[100. 100. 100. 100. 100. 100.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  97.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  97.  97.  96.  96.  96.  96.  96.  96.  96.  96.
  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  95.  95.  95.  95.
  95.  95.  95.  95.  95.  95.  95.  94.  94.  94.  94.  94.  94.  94.
  94.  94.  93.  92.  91.  90.  90.  89.  88.  88.  87.  87.  87.  85.
  84.  82.]
-
[100. 100. 100. 100. 100. 100.  99.  99.  99.  99.]
before: 88
n_childrens: 15
n_mutants: 15
after: 118
------------------------------------
epoch 7


100%|██████████| 118/118 [04:20<00:00,  2.20s/it]


[100. 100. 100.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.
  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  95.  95.
  95.  95.  95.  95.  95.  95.  95.  95.  95.  95.  95.  95.  95.  94.
  94.  94.  94.  94.  94.  93.  93.  93.  93.  92.  92.  91.  89.  89.
  87.  87.  83.  83.  78.  74.]
-
[100. 100. 100.  99.  99.  99.  99.  99.  99.  99.]
before: 86
n_childrens: 15
n_mutants: 15
after: 116
------------------------------------
epoch 8


100%|██████████| 116/116 [04:08<00:00,  2.15s/it]


[100. 100. 100. 100. 100. 100. 100.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  96.  96.
  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.
  96.  95.  95.  95.  95.  95.  95.  95.  95.  95.  95.  95.  95.  94.
  94.  94.  94.  93.  93.  93.  93.  93.  93.  92.  91.  89.  88.  87.
  83.  82.  80.  78.]
-
[100. 100. 100. 100. 100. 100. 100.  99.  99.  99.]
before: 85
n_childrens: 15
n_mutants: 15
after: 115
------------------------------------
epoch 9


100%|██████████| 115/115 [04:09<00:00,  2.17s/it]

[100. 100. 100. 100.  99.  99.  99.  99.  99.  99.  99.  99.  99.  99.
  99.  99.  99.  99.  99.  99.  99.  99.  99.  98.  98.  98.  98.  98.
  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.  98.
  98.  98.  98.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.
  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  97.  96.  96.  96.
  96.  96.  96.  96.  96.  96.  96.  96.  96.  96.  95.  95.  95.  95.
  95.  95.  95.  95.  95.  95.  95.  95.  94.  94.  94.  94.  94.  94.
  94.  93.  93.  93.  93.  93.  93.  91.  91.  91.  88.  84.  83.  82.
  80.  80.  80.]
-
[100. 100. 100. 100.  99.  99.  99.  99.  99.  99.]
before: 87
n_childrens: 15
n_mutants: 15
after: 117





In [13]:
ThePlayer = population[0]
print(ThePlayer.get_weights()[1])

[[0.85521739 0.05322313 0.31760296 0.16237783 0.05228784]
 [0.45405633 0.20165095 0.02995066 0.75621525 0.52156041]
 [0.83660336 0.88591001 0.40894744 0.1898644  0.1897356 ]
 [0.96934346 0.47504379 0.15206596 0.54738668 0.37318642]
 [0.57614054 0.13228065 0.56568559 0.23696391 0.65760855]]


In [14]:
wins_first = 0
wins_second = 0
n_trials = 1000

player1 = ThePlayer
player2 = RandomPlayer()

for _ in tqdm(range(n_trials)):
    g = Game()

    winner = g.play(player1, player2)

    if winner == 0: wins_first += 1

    g = Game()

    winner = g.play(player2, player1)

    if winner == 1: wins_second += 1

print(f"Player won {wins_first} / {n_trials} as first")
print(f"Player won {wins_second} / {n_trials} as second")

100%|██████████| 1000/1000 [00:41<00:00, 23.84it/s]

Player won 972 / 1000 as first
Player won 960 / 1000 as second



