In [None]:
import logging
from pprint import pformat
from collections import namedtuple
import random
from copy import deepcopy
from dataclasses import dataclass
from tqdm.notebook import tqdm
import numpy as np

In [None]:
# A way to represent a state of the game
Nimply = namedtuple("Nimply", "row, num_objects")

In [None]:
# A way to represent a game itself
class Nim:
    # The constructor: it builds the rows and defines the upper bound for the number of objects that can be removed
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    # ToString method
    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    # Returns the rows as a tuple
    def rows(self) -> tuple:
        return tuple(self._rows)

    # Checks if the move is valid and applies it
    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [628]:
NIM_SIZE = 10
MAX_OBJECTS = NIM_SIZE * 2 + 1

In [None]:
# Generates a "score" for the state of the game based on the transformation of the rows into binary strings and the xor of the rows
def nim_sum(state: Nim) -> int:
    # Transforms the rows into binary strings of length 32
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    # Xor of the rows
    xor = tmp.sum(axis=0) % 2
    # Return the value of the xor as an integer (0 means that the game is lost)
    return int("".join(str(_) for _ in xor), base=2)

# Generates a dictionary with the nim_sum (score) associated to all possible moves
def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    # For each possible move
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked

# Selects a move from the possible moves
def optimal(state: Nim) -> Nimply:
    # Generates the possible moves and their score
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    # Filters for moves that don't make the game lost (score = 0)
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    # Selects a random move from the possible moves
    ply = random.choice(spicy_moves)
    return ply

In [None]:
# corresponding index of move_scores in the hash_table of strategies
MOVES_DICTIONARY = {
    0: 'AA',
    1: 'BA',
    2: 'BB',
    3: 'CA',
    4: 'CB',
    5: 'CC',
    6: 'DA',
    7: 'DB',
    8: 'DC',
    9: 'DD',
}

In [None]:
SET_OF_ALLOWED_MOVES = set()
for value in MOVES_DICTIONARY.values():
    SET_OF_ALLOWED_MOVES.add(value)

print(SET_OF_ALLOWED_MOVES)

In [None]:
class CustomHashTable:
    def __init__(self):
        self.hash_table = {}

    def hash_function(self, key):
        hash_value = ord(key[0]) * 10000 + ord(key[1]) * 1000 + ord(key[2]) * 100 + ord(key[3]) * 10 + ord(key[4])
        return hash_value

    def insert(self, key, value):
        # index = self.hash_function(key)
        index = key
        if index not in self.hash_table:
            scores = []
            for _ in range(10):
                scores.append(random.randint(1, 10))
            self.hash_table[index] = scores
        self.hash_table[index] = value

    # if index not yet discovered, create it and initialize it with a random value between 1 and 10
    def get(self, key):
        # index = self.hash_function(key)
        index = key
        if index not in self.hash_table:
            scores = []
            for _ in range(10):
                scores.append(random.randint(1, 10))
            self.hash_table[index] = scores
        return self.hash_table[index]
    
    def get_keys(self):
        # here it should reconstruct the keys from numbers to strings
        return list(self.hash_table.keys())

In [None]:
hash_table = CustomHashTable()
hash_table.get("DDZDD")
hash_table.get("DDZDP")
hash_table.get("DDZDQ")
hash_table.get("DDDDD")

keys = hash_table.get_keys()
print(keys)

In [None]:
def generate_gaussian_value(mean = 0, std_dev = 1):
    # Genera un valore casuale basato sulla distribuzione normale (gaussiana)
    value = random.gauss(mean, std_dev)

    # Normalizza il valore nel range [0, 1] utilizzando la funzione di distribuzione cumulativa
    # normalized_value = (value - mean) / std_dev

    # Assicurati che il valore sia compreso tra 0 e 1
    value = max(0, min(1, value))

    return value

generated_value = generate_gaussian_value()

print(generated_value)

In [None]:
# in version 3 removed randomness
# @dataclass
# class Strategy:
#     # randomness: float
#     hash_table: CustomHashTable = CustomHashTable()
@dataclass
class Strategy:
    def __init__(self):
        self.hash_table = CustomHashTable()

In [None]:
strat = Strategy()
print(strat.hash_table.get("AAAAA"))

strat2 = Strategy()
print(strat2.hash_table.get("AAAAA"))

In [609]:
# Definition of parameters
NUMBER_OF_FITNESS_GAMES = 50
INITIAL_POPULATION = 100 * 2 # always divisible by 2
NUMBER_OF_GENERATIONS = 50
best_strategy = None

In [None]:
# Generates the initial population of strategies
def generate_initial_population(number_of_individuals: int) -> list[Strategy]:
    population = []
    for _ in range(number_of_individuals):
        population.append(
            Strategy())
    return population

In [None]:
# generates a list with all the possible moves (of type Nimply) at the current state of the game
def possible_moves(state: Nim) -> list:
    possible_moves = []
    for ply in (Nimply(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)):
        try:
            tmp = deepcopy(state)
            tmp.nimming(ply)
            possible_moves.append(ply)
            # print("assertion ok")
        except AssertionError:
            print("assertion error")
    return possible_moves

#### How I describe the state:
There are 4 possible types of actions: a, b, c, d.
They are described with the number of elements left after applying the move:
* a: 0
* b: 1
* c: >1 && even
* d: >1 && odd

There are 4 possible types of state: A, B, C, D.
They are described with the number of possible actions applicable:
* A: 1 (a)
* B: 2 (a, b)
* C: 3 (a, b, c)
* D: 4 (a, b, c, d)

There are 2 possible types of number of rows: even, odd.
* even
* odd

The flow of game is D --> C --> B --> A

It is not possible to go back so that this can be used to understand the distance from the end of a game

In [None]:
def describe_game(actual_state: Nim):
    state = deepcopy(actual_state)
    max_value = max(state.rows)

    if max_value > 3:
        return 4
    else:
        return max_value

In [None]:
nim = Nim(3)
print(describe_game(nim))
nim.nimming(Nimply(2, 3))
print(describe_game(nim))

In [None]:
move = Nimply(2, 3)
print(move.row)

In [None]:
def describe_move(actual_state: Nim, move: Nimply):
    element_before_move = actual_state.rows[move.row]
    state = deepcopy(actual_state)
    state.nimming(move)
    new_elements = state.rows[move.row]

    move_description = ""
    if element_before_move > 3:
        move_description += "D"
    elif element_before_move == 3:
        move_description += "C"
    elif element_before_move == 2:
        move_description += "B"
    else:
        move_description += "A"

    if new_elements > 1:
        if new_elements % 2 == 0:
            move_description += "C"
        else:
            move_description += "D"
    else:
        move_description += chr(64 + 1 + new_elements)

    return move_description

In [None]:
nim = Nim(3)
move = Nimply(1, 1)
print(describe_move(nim, move))

In [None]:
def generate_hash_key(actual_state: Nim):
    state = deepcopy(actual_state)
    key = ""

    # first character = [A, B, C, D] = category of the actual state
    state_category = max(state.rows)
    if state_category > 3:
        key += "D"
    else:
        key += chr(state_category + 64)

    # second character = [P, D, Z] = pari, dispari or zero for rows of category A
    # third character = [P, D, Z] = pari, dispari or zero for rows of category B
    # fourth character = [P, D, Z] = pari, dispari or zero for rows of category C
    # fifth character = [P, D] = pari, dispari for rows of category D
    a_category_rows = 0
    b_category_rows = 0
    c_category_rows = 0
    d_category_rows = 0
    for row in state.rows:
        if row > 3:
            d_category_rows += 1
        elif row == 3:
            c_category_rows += 1
        elif row == 2:
            b_category_rows += 1
        else:
            a_category_rows += 1

    if a_category_rows > 0:
        if a_category_rows % 2 == 0:
            key += "P"
        else:
            key += "D"
    else:
        key += "Z"

    if b_category_rows > 0:
        if b_category_rows % 2 == 0:
            key += "P"
        else:
            key += "D"
    else:
        key += "Z"

    if c_category_rows > 0:
        if c_category_rows % 2 == 0:
            key += "P"
        else:
            key += "D"
    else:
        key += "Z"

    if d_category_rows > 0:
        if d_category_rows % 2 == 0:
            key += "P"
        else:
            key += "D"
    else:
        key += "Z"

    return key  
    

In [None]:
nim = Nim(3)
print(generate_hash_key(nim))

#### How to choose a move
The idea is based on attaching a coefficient that varies during training to the type of row and type of move that are multiplied by type of state and even or odd rows number

type_of_row_chosen = A, B, C, D  (row chosen)
type_of_move_chosen = a, b, c, d (number of elements from row chosen)

type_of_state = A, B, C, D
even_or_odd_rows = even, odd

Strategy parameters (coefficients):
    state_score

La classe CustomHashTable è utilizzata per gestire i geni delle strategie:
ogni gene è una hash table che, per ogni entry, possiede una lista in cui ci sono le mosse possibili in un determinato stato.

L'idea consiste nel codificare nella hash table tutte le possibili situazioni di gioco che, dopo un'analisi possono essere categorizzate per ridurre drasticamente il numero di combinazioni che, altrimenti, crescerebbe in maniera incontrollabile.
L'idea alla base è che, dato uno stato, esso può essere univocamente descritto con una stringa di 5 lettere:
1. A, B, C, D: rappresenta il tipo di stato
2. P,D,Z: rappresenta se le righe di tipo A sono pari, dispari o non presenti (Zero)
3. P,D,Z: rappresenta se le righe di tipo B sono pari, dispari o non presenti (Zero)
4. P,D,Z: rappresenta se le righe di tipo C sono pari, dispari o non presenti (Zero)
5. P,D: rappresenta se le righe di tipo D sono pari o dispari (non possono essere zero, altrimenti lo stato sarebbe C)

Una volta codificato lo stato, la chiave sarà un array di 5 caratteri che punterà ad una lista che al più potrà avere 10 valori: ogni valore rappresenta il punteggio dato ad una certa mossa.
Il valore massimo è 10 in quanto è possibile effettuare un'operazione di categorizzazione anche sulle mosse, le quali vengono descritte dall'effetto che generano:
1. tipo a: rimangono 0 elementi nella riga
2. tipo b: rimane 1 elemento nella riga
3. tipo c: rimane più di un elemento nella riga ma in numero pari
4. tipo d: rimane più di un elemento nella riga ma in numero dispari

esempio: stato = [1, 3, 5, 7, 9] = DDZZP = stato tipo D, Dispari righe di tipo A, Zero righe di tipo B, Zero righe di tipo C, Pari righe di tipo D.

Questa codifica è efficiente in quanto permette di descrivere un numero teoricamente illimitato di stati diversi nello stesso modo:
[0, 0, 5, 7, 1, 3, 3] = DDZZP

Come si può notare, due stati totalmente diversi sono codificati dallo stesso hash.

Allo stesso modo, le mosse possibili in questi due stati sono della stessa tipologia:
[(A, a), (D, a), (D, b), (D, c), (D, d)]
In ordine: data una riga A, posso lasciare zero elementi nella riga, data una riga D posso lasciare 0, 1, pari o dispari elementi.
Ognuna delle codifiche delle mosse viene tradotta da una funzione che seleziona una qualunque delle mosse appartenenti a quella categoria che avranno tutte lo stesso effetto nello stato corrente del gioco.

Il numero di chiavi diverse possibili è di 4 * 3 * 3 * 3 * 2 = 216 che viene portato a 80 in quanto molti dei 216 stati sono illegali.
* Chiavi che codificano gli stati di tipo A: APZZZ e ADZZZ --> TOT: 2
* Chiavi che codificano gli stati di tipo B: BZPZZ, BPPZZ, BDPZZ, BZDZZ, BPDZZ, BDDZZ --> TOT: 6
* Chiavi che codificano gli stati di tipo B: CZZPZ, ..., CDDDZ --> TOT: 18
* Chiavi che codificano gli stati di tipo B: DZZZP, ..., DDDDD --> TOT: 54
Per un totale di 2 + 6 + 18 + 54 = 80

Ognuna di queste codifiche ha al più 10 elementi nella lista a cui punta per un limite massimo di 800 valori che, considerando quelli illegali, è sicuramente un dato gestibile.

In [None]:
# this should let strategies play against each other and assign a score to each of them based on the number of wins
def fitness_function(strategy: Strategy, num_games = NUMBER_OF_FITNESS_GAMES) -> int:
    score = 0
    for _ in range(num_games):
        # everytime the strategy wins it gets a point
        for starting in range(2):
            # plays half of the times firts and half of the times as second
            if nim_match(optimal, strategy, player = starting, fitness = True) == 1:
                score += 1
    return score

In [None]:
def crossover(mother: Strategy, father: Strategy):
    child1 = Strategy()
    child2 = Strategy()

    # set_keys_mother = set(mother.hash_table.hash_table.keys())
    # set_keys_father = set(father.hash_table.hash_table.keys())
    # set_keys_childrend = set_keys_mother.union(set_keys_father)
    set_keys_mother = set(mother.hash_table.get_keys())
    set_keys_father = set(father.hash_table.get_keys())
    set_keys_childrend = set_keys_mother.union(set_keys_father)

    # print("set_keys_mother: " + str(set_keys_mother))
    # print("set_keys_father: " + str(set_keys_father))
    # print("set_keys_childrend: " + str(set_keys_childrend))

    for key in set_keys_childrend:
        # print("key: " + str(key))
        if key in set_keys_mother and key in set_keys_father:
            # print("crossover")
            child_1_scores = []
            child_2_scores = []

            mother_scores = mother.hash_table.get(key)
            # print("mother_scores: " + str(mother_scores))
            father_scores = father.hash_table.get(key)
            # print("father_scores: " + str(father_scores))

            i_love_mum = 1
            for i in range(10):
                if mother_scores[i] == 0 or father_scores[i] == 0:
                    # print("zero")
                    child_1_scores.append(0)
                    child_2_scores.append(0)
                else:
                    if i_love_mum == 1:
                        child_1_scores.append(mother_scores[i])
                        child_2_scores.append(father_scores[i])
                        i_love_mum = 0
                    else:
                        child_1_scores.append(father_scores[i])
                        child_2_scores.append(mother_scores[i])
                        i_love_mum = 1
            child1.hash_table.insert(key, child_1_scores)
            child2.hash_table.insert(key, child_2_scores)
        elif key in set_keys_mother:
            child1.hash_table.insert(key, mother.hash_table.get(key))
            child2.hash_table.insert(key, mother.hash_table.get(key))
        else:
            child1.hash_table.insert(key, father.hash_table.get(key))
            child2.hash_table.insert(key, father.hash_table.get(key))

    return child1, child2

In [None]:
population = generate_initial_population(2)
mother = population[0]
mother.hash_table.insert("DDDDD", [1, 2, 3, 4, 5, 0, 7, 8, 9, 10])
print("mother DDDDD: ", mother.hash_table.get("DDDDD"))
# mother.hash_table.insert("DDDDP", [1, 2, 3, 4, 5, 0, 7, 8, 9, 10])
print("mother DDDDP: ", mother.hash_table.get("DDDDP"))
father = population[1]
father.hash_table.insert("DDDDD", [10, 9, 0, 7, 6, 5, 4, 3, 2, 1])
print("father DDDDD: ", father.hash_table.get("DDDDD"))
# father.hash_table.insert("DPZPD", [10, 9, 0, 7, 6, 5, 4, 3, 2, 1])
print("father DDDPP: ", father.hash_table.get("DDDPP"))
child1, child2 = crossover(mother, father)
print("child DDDDD: ", child1.hash_table.get("DDDDD"))
print("child DDDDP: ", child1.hash_table.get("DDDDP"))
print("child DDDDD: ", child2.hash_table.get("DDDDD"))
print("child DDDDP: ", child2.hash_table.get("DDDDP"))

In [None]:
def calculate_scores(population: list[Strategy]) -> list:
    scores = []
    for strategy in tqdm(population):
        score = fitness_function(strategy)
        scores.append((strategy, score))
    
    # sort scores by fitnes descending
    scores.sort(key=lambda x: x[1], reverse=True)
    return scores

In [604]:
# gaussian mutation
standard_deviation = 0.6
MUTATION_RATE = 0.3

def mutate(strategy: Strategy) -> Strategy:
    mutated_strategy = Strategy()
    set_keys = set(strategy.hash_table.get_keys())
    for key in set_keys:
        scores = strategy.hash_table.get(key)
        for i in range(10):
            if scores[i] != 0:
                if random.random() < MUTATION_RATE:
                    mutation = random.gauss(0, standard_deviation)
                    # print("mutation: " + str(mutation))
                    mutated_score = scores[i] + mutation
                    mutated_score = max(0.01, min(10, mutated_score))
                    scores[i] = mutated_score
        mutated_strategy.hash_table.insert(key, scores)
    return mutated_strategy
    

In [None]:
strat = Strategy()
print("strat DDDDD: ", strat.hash_table.get("DDDDD"))
print("strat DDDDP: ", strat.hash_table.get("DDDDP"))
mutated_strat = mutate(strat)
print("mutated_strat DDDDD: ", mutated_strat.hash_table.get("DDDDD"))
print("mutated_strat DDDDP: ", mutated_strat.hash_table.get("DDDDP"))

In [None]:
# INITIAL_POPULATION = 200
# NUMBER_OF_FITNESS_GAMES = 50 * 2 = 100
# NUMBER_OF_GENERATIONS = 50

In [None]:
population = generate_initial_population(INITIAL_POPULATION)
scores = calculate_scores(population)

In [None]:
for score in scores[:5]:
    print(score[1])

print("...")

for score in scores[-5:]:
    print(score[1])

print("best score:", scores[0][1])


In [620]:
# this returns the best strategy in a population after the application of the genetic algorithm
def evolve() -> Strategy:
    # generate random strategies (initial population)
    population = generate_initial_population(INITIAL_POPULATION)

    # calculate the fitness of each strategy
    scores = []
    scores = calculate_scores(population)

    best_score = scores[0][1]
    print("best score:", best_score)

    # start the genetic algorithm
    for _ in tqdm(range(NUMBER_OF_GENERATIONS)):
        new_generation = []

        # select the best strategies
        # best_half = scores[:len(scores)//2]
        best_quarter = scores[:len(scores)//4]
        # best_ten = scores[:len(scores)//10]

        # while len(best_quarter) > 0:
        for _ in range((INITIAL_POPULATION - len(best_quarter)) // 2):
            # crossover
            # we pick randomly the mother and the father from the best quarter
            mother = random.choice(best_quarter)
            # best_quarter.pop(best_quarter.index(mother))
            father = random.choice(best_quarter)
            # best_quarter.pop(best_quarter.index(father))

            # generate two children
            child1, child2 = crossover(mother[0], father[0])

            # mutation

            child1 = mutate(child1)
            child2 = mutate(child2)

            new_generation.append(child1)
            new_generation.append(child2)
            # new_generation.append(mother[0])
            # new_generation.append(father[0])

        # calculate the fitness of each strategy
        scores = []
        scores = calculate_scores(new_generation)

        # to not recompute again the fitness of the best quarter
        scores += best_quarter
        scores.sort(key=lambda x: x[1], reverse=True)
        
        print("best score:", scores[0][1])

    # return the best strategy
    print("scores after training: ", scores)
    
    return scores[0][0]

In [None]:
best_strategy = evolve()

In [None]:
def choose_based_on_scores(rows_scores: list[float]) -> int:
    # calculates a weighted probability for each row based on the scores
    total = sum(rows_scores)
    probabilities = [score / total for score in rows_scores]
    indices = list(range(len(rows_scores)))
    chosen_index = random.choices(indices, probabilities)[0]
    return chosen_index

In [632]:
# the function that returns the move to do 
def evolution_strategy(state: Nim, strategy: Strategy) -> Nimply:
    # generate the possible moves
    allowed_moves = possible_moves(state)

    # transform the list of allowed_moves into a list of move_types
    allowed_moves_types = []
    for move in allowed_moves:
        allowed_moves_types.append(describe_move(state, move))

    set_of_allowed_moves = set()
    for value in allowed_moves_types:
        set_of_allowed_moves.add(value)

    set_of_not_allowed_moves = SET_OF_ALLOWED_MOVES - set_of_allowed_moves

    # print("set_of_allowed_moves: ", set_of_allowed_moves)
    # print("set_of_not_allowed_moves: ", set_of_not_allowed_moves)

    # generate the hash_key of the state
    hash_key = generate_hash_key(state)
    # print("hash_key: ", hash_key)

    # retrieve the scores of the moves from the hash_table
    moves_scores = strategy.hash_table.get(hash_key)

    # for each index retrieved from calling MOVES_DICTIONARY with the index of set_of_not_allowed_moves we set the score to 0 of moves_scores
    for index in set_of_not_allowed_moves:
        for key, value in MOVES_DICTIONARY.items():
            if value == index:
                moves_scores[key] = 0

    # print("moves_scores: ", moves_scores)


    # order moves_scores as a tuple of type (score, move_type) in descending order
    moves_scores = list(zip(moves_scores, allowed_moves_types))
    moves_scores.sort(key=lambda x: x[0], reverse=True)
    # print("moves_scores: ", moves_scores)

    # starting from the first move, check if it is in the list of allowed moves, if yes, return it
    for move in moves_scores:
        if move[1] in allowed_moves_types:
            return allowed_moves[allowed_moves_types.index(move[1])]
        
    print("random move")
    return random.choice(allowed_moves)

In [None]:
nim = Nim(6)
nim.nimming(Nimply(5, 9))
strategy = Strategy()
evolution_strategy(nim, strategy)

todo: 
per modificare gli scores bisogna tenere traccia delle mosse scelte e, una volta calcolato il verdetto della partita attribuire un punteggio positivo o negativo a tutte le mosse effettuate in quella partita

In [None]:
def nim_match(strategy1, strategy2, nim_size = NIM_SIZE, player = 0, fitness = False, debug = False, winner = False):
    agents = (strategy1, strategy2)
    if debug:
        print("agent1: ", agents[0])
        print("agent2: ", agents[1])
    nim = Nim(nim_size)
    logging.info(f"init : {nim}")
    if debug:
        print(f"init : {nim}")
    while nim:
        if player == 0:
            selected_move = agents[player](nim)
        else:
            selected_move = evolution_strategy(nim, agents[player])
        logging.info(f"ply: player {player} plays {selected_move}")
        if debug:
            print(f"ply: player {player} plays {selected_move}")
        nim.nimming(selected_move)
        logging.info(f"status: {nim}")
        if debug:
            print(f"status: {nim}")
        player = 1 - player
    logging.info(f"status: Player {player} won!")
    if debug or winner:
        print(f"status: Player {player} won!")
    if fitness:
        return player

In [636]:
keys = best_strategy.hash_table.get_keys()
# print("keys: ", keys)

for key in keys:
    print("key: ", key)
    print("scores: ", best_strategy.hash_table.get(key))

key:  DDPZP
scores:  [0, 1.1001276346455608, 3.5130084365119645, 0, 0, 0, 9.719813848021348, 5.558197890011561, 6.483797931951061, 3.3728103592704026]
key:  DDZZP
scores:  [0, 0, 0, 0, 0, 0, 5.688408083909847, 7.922165965921437, 3.642160588451596, 6.032511721038891]
key:  DDDDP
scores:  [0, 0.9652008333051281, 0.9957407046257971, 8.237292828852489, 3.882519024759666, 3.2338459505271944, 4.744312366161563, 8.647956995776532, 9.44799068338264, 9.343366994613286]
key:  CPDPZ
scores:  [0, 10, 2.633346377664345, 0.01, 3.5036900140295315, 5.742104137179045, 0, 0, 0, 0]
key:  DPZZD
scores:  [0, 0, 0, 0, 0, 0, 1.6438053108392987, 10, 7.847282089242049, 10]
key:  DDDPD
scores:  [0, 4.6015661417862415, 2.112114014469018, 5.58487715988138, 7.92155977993975, 6.756143775813544, 8.962694511540263, 10, 6.229421124197666, 0.891189815144697]
key:  ADZZZ
scores:  [3, 0, 0, 0, 0, 0, 0, 0, 0, 0]
key:  CPPDZ
scores:  [0, 4.639061251067488, 7.645784228664764, 2.0636973704957664, 3.435560565935416, 10, 0, 0,

In [650]:
matches = 1000
wins_first = 0
wins_second = 0

for _ in tqdm(range(matches)):
    wins_second += nim_match(optimal, best_strategy, nim_size = 4, debug = False, fitness = True)
    wins_first += nim_match(optimal, best_strategy, nim_size = 4, debug = False, player = 1, fitness = True)

print("matches played: ", matches)
print("---------------------------------")
print("wins playing first: ", wins_first)
print("percentage of wins playing fist: ", wins_first/matches * 100, "%")
print("---------------------------------")
print("wins playing second: ", wins_second)
print("percentage of wins playing second: ", wins_second/matches * 100, "%")
print("---------------------------------")

  0%|          | 0/1000 [00:00<?, ?it/s]

matches played:  1000
---------------------------------
wins playing first:  250
percentage of wins playing fist:  25.0 %
---------------------------------
wins playing second:  356
percentage of wins playing second:  35.6 %
---------------------------------


In [648]:
nim_match(optimal, best_strategy, nim_size = 3, debug = True)

agent1:  <function optimal at 0x10856b910>
agent2:  Strategy()
init : <1 3 5>
ply: player 0 plays Nimply(row=0, num_objects=1)
status: <0 3 5>
ply: player 1 plays Nimply(row=2, num_objects=1)
status: <0 3 4>
ply: player 0 plays Nimply(row=1, num_objects=3)
status: <0 0 4>
ply: player 1 plays Nimply(row=2, num_objects=1)
status: <0 0 3>
ply: player 0 plays Nimply(row=2, num_objects=2)
status: <0 0 1>
ply: player 1 plays Nimply(row=2, num_objects=1)
status: <0 0 0>
status: Player 0 won!
