In [2]:
import logging
from pprint import pformat
from collections import namedtuple
import random
from copy import deepcopy
from dataclasses import dataclass
import math
from tqdm.notebook import tqdm
import numpy as np

In [3]:
# A way to represent a state of the game
Nimply = namedtuple("Nimply", "row, num_objects")

In [4]:
# A way to represent a game itself
class Nim:
    # The constructor: it builds the rows and defines the upper bound for the number of objects that can be removed
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    # ToString method
    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    # Returns the rows as a tuple
    def rows(self) -> tuple:
        return tuple(self._rows)

    # Checks if the move is valid and applies it
    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [5]:
NIM_SIZE = 5
MAX_OBJECTS = NIM_SIZE * 2 + 1

In [6]:
# Generates a "score" for the state of the game based on the transformation of the rows into binary strings and the xor of the rows
def nim_sum(state: Nim) -> int:
    # Transforms the rows into binary strings of length 32
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    # Xor of the rows
    xor = tmp.sum(axis=0) % 2
    # Return the value of the xor as an integer (0 means that the game is lost)
    return int("".join(str(_) for _ in xor), base=2)

# Generates a dictionary with the nim_sum (score) associated to all possible moves
def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    # For each possible move
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked

# Selects a move from the possible moves
def optimal(state: Nim) -> Nimply:
    # Generates the possible moves and their score
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    # Filters for moves that don't make the game lost (score = 0)
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    # Selects a random move from the possible moves
    ply = random.choice(spicy_moves)
    return ply

In [7]:
# in version 3 removed randomness
@dataclass
class Strategy:
    # randomness: float
    rows_scores: list[float]
    objects_to_take: float
    aggressive: bool

In [42]:
# Definition of parameters
NUMBER_OF_FITNESS_GAMES = 50
INITIAL_POPULATION = 10 * 2 # always divisible by 2
NUMBER_OF_GENERATIONS = 30
best_strategy = None

In [9]:
# Generates the initial population of strategies
def generate_initial_population(number_of_individuals: int) -> list[Strategy]:
    population = []
    for _ in range(number_of_individuals):
        population.append(
            Strategy([random.random() for _ in range(NIM_SIZE)], 
                     random.random(), 
                     random.choice([True, False])))
    return population

In [10]:
# generates a list with all the possible moves (of type Nimply) at the current state of the game
def possible_moves(state: Nim) -> list:
    possible_moves = []
    for ply in (Nimply(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)):
        try:
            tmp = deepcopy(state)
            tmp.nimming(ply)
            possible_moves.append(ply)
            # print("assertion ok")
        except AssertionError:
            print("assertion error")
    return possible_moves

In [104]:
def describe_game(actual_state: Nim):
    state = deepcopy(actual_state)
    legal_moves = possible_moves(state)
    number_of_possible_moves = len(legal_moves)
    number_of_residual_rows = len(set([ply.row for ply in legal_moves]))
    set_of_legal_rows = set([ply.row for ply in legal_moves])
    
    # number_of_even_rows is the number of rows with an even number of objects (pari)
    number_of_even_rows = len([row for row in state.rows if row != 0 and row % 2 == 0])
    # number_of_odd_rows is the number of rows with an odd number of objects (dispari)
    number_of_odd_rows = number_of_residual_rows - number_of_even_rows

    # print(f"number of possible moves: {number_of_possible_moves}")
    # print(f"number of residual rows: {number_of_residual_rows}")
    # print(f"set of legal rows: {set_of_legal_rows}")
    # print(f"number of even rows: {number_of_even_rows}")
    # print(f"number of odd rows: {number_of_odd_rows}")

    return number_of_residual_rows, number_of_even_rows, number_of_odd_rows

In [102]:
def describe_move(state: Nim, move: Nimply):
    row_chosen = move.row
    actual_state = deepcopy(state)
    actual_residual_rows, actual_state_even_rows, actual_state_odd_rows = describe_game(actual_state)
    future_state = deepcopy(state)
    future_state.nimming(move)
    future_residual_rows, future_state_even_rows, future_state_odd_rows = describe_game(future_state)

    # neutral_move = 0
    # remove_row_move = 1 (removes a row)
    # leave_one_move = 2 (leaves one object in a row)
    move_descriptor = None
    if actual_residual_rows > future_residual_rows:
        move_descriptor = 1
    elif actual_state.rows[row_chosen] != 1 and future_state.rows[row_chosen] == 1:
        move_descriptor = 2
    else:
        move_descriptor = 0

    return move_descriptor

In [105]:
game = Nim(5)
describe_game(game)
describe_move(game, Nimply(1, 2))

2

In [11]:
# this should let strategies play against each other and assign a score to each of them based on the number of wins
# version 3: give a penalty for who loses
def fitness_function_3(strategy: Strategy, num_games = NUMBER_OF_FITNESS_GAMES) -> int:
    score = 0
    for _ in range(num_games):
        # everytime the strategy wins it gets a point
        if nim_match_2(optimal, strategy, fitness = True) == 1:
            score += 1
        else:
            score -= 1
    return score

In [12]:
def crossover(mother: Strategy, father: Strategy):
    child1 = Strategy(rows_scores = father.rows_scores[0:2] + mother.rows_scores[2:],
                      objects_to_take = father.objects_to_take,
                      aggressive = random.choice([mother.aggressive, father.aggressive]))
    child2 = Strategy(rows_scores = mother.rows_scores[0:2] + father.rows_scores[2:],
                        objects_to_take = mother.objects_to_take,
                        aggressive = random.choice([mother.aggressive, father.aggressive]))
    return child1, child2

In [13]:
def calculate_scores(population: list[Strategy]) -> list:
    scores = []
    for strategy in population:
        score = fitness_function_3(strategy)
        scores.append((strategy, score))
    
    # sort scores by fitnes descending
    scores.sort(key=lambda x: x[1], reverse=True)
    return scores

In [14]:
def mutate(strategy: Strategy) -> Strategy:
    mutated_strategy = deepcopy(strategy)
    index_of_mutation = random.choice([i for i in range(len(mutated_strategy.rows_scores))])
    actual_value = mutated_strategy.rows_scores[index_of_mutation]
    if random.random() < 0.5:
        mutated_strategy.rows_scores[index_of_mutation] -= actual_value * 0.1
    else:
        mutated_strategy.rows_scores[index_of_mutation] += actual_value * 0.1
    return mutated_strategy
    

In [15]:
# this returns the best strategy in a population after the application of the genetic algorithm
# version 3.0: cleaned code, more function calls, removed randomness
def evolve_3() -> Strategy:
    # generate random strategies (initial population)
    population = generate_initial_population(INITIAL_POPULATION)
    # print("initial population:", population)

    # calculate the fitness of each strategy
    scores = []
    scores = calculate_scores(population)

    # start the genetic algorithm
    for _ in tqdm(range(NUMBER_OF_GENERATIONS)):
        new_generation = []

        # select the best strategies
        # we take the first half of the population (the best half)
        best_half = scores[:len(scores)//2]

        while len(best_half) > 0:
            # crossover
            # we pick randomly the mother and the father from the best half
            mother = random.choice(best_half)
            best_half.pop(best_half.index(mother))
            father = random.choice(best_half)
            best_half.pop(best_half.index(father))

            # generate two children
            child1, child2 = crossover(mother[0], father[0])

            # mutation
            # parameters of the mutation to try:
            # number of parameters: 1, 2, 3, 4
            # type of mutation: random, gaussian
            # mutation probability: 0.1, 0.2, 0.3, 0.4, 0.5
            # mutation amplitude: 0.1, 0.2, 0.3, 0.4, 0.5

            child1 = mutate(child1)
            child2 = mutate(child2)

            new_generation.append(child1)
            new_generation.append(child2)
            new_generation.append(mother[0])
            new_generation.append(father[0])

        # calculate the fitness of each strategy
        scores = []
        scores = calculate_scores(new_generation)

    # return the best strategy
    # print("scores after training: ", scores)
    # print("best strategy after training: ", scores[0][0])
    return scores[0][0]

In [16]:
def choose_based_on_scores(rows_scores: list[float]) -> int:
    # calculates a weighted probability for each row based on the scores
    total = sum(rows_scores)
    probabilities = [score / total for score in rows_scores]
    indices = list(range(len(rows_scores)))
    chosen_index = random.choices(indices, probabilities)[0]
    return chosen_index

In [37]:
# I think the bottleneck is here, the evolution strategy does not works as expected
def evolution_strategy_3(state: Nim, strategy: Strategy) -> Nimply:
    # generate the possible moves
    allowed_moves = possible_moves(state)
    # print(allowed_moves)
    
    # calculate the nim_sum for each move, choose the best one
    selected_move = random.choice(allowed_moves)
    score = 0
    for move in allowed_moves:
        tmp = deepcopy(state)
        tmp.nimming(move)
        if nim_sum(tmp) > score:
            score = nim_sum(tmp)
            selected_move = move

    # print("selected_move: ", selected_move)
    # print("selected move : ", selected_move)
    # toreturn = Nimply(selected_move.row, selected_move.num_objects)
    return selected_move

In [20]:
def nim_match_2(strategy1, strategy2, nim_size = NIM_SIZE, player = 0, fitness = False, debug = False, winner = False):
    agents = (strategy1, strategy2)
    if debug:
        print("agent1: ", agents[0])
        print("agent2: ", agents[1])
    nim = Nim(nim_size)
    logging.info(f"init : {nim}")
    if debug:
        print(f"init : {nim}")
    while nim:
        if player == 0:
            selected_move = agents[player](nim)
        else:
            selected_move = evolution_strategy_3(nim, agents[player])
        logging.info(f"ply: player {player} plays {selected_move}")
        if debug:
            print(f"ply: player {player} plays {selected_move}")
        nim.nimming(selected_move)
        logging.info(f"status: {nim}")
        if debug:
            print(f"status: {nim}")
        player = 1 - player
    logging.info(f"status: Player {player} won!")
    if debug or winner:
        print(f"status: Player {player} won!")
    if fitness:
        return player

In [43]:
best_strategy_3 = evolve_3()

print("best_strategy_2:", best_strategy_3)

  0%|          | 0/30 [00:00<?, ?it/s]

best_strategy_2: Strategy(rows_scores=[0.6170313268390217, 0.4308766989466982, 0.26649903789765034, 0.08152594142113684, 0.617636341856293], objects_to_take=0.9319648833982943, aggressive=False)


In [44]:
matches = 1000
wins = 0

for _ in tqdm(range(matches)):
    wins += nim_match_2(optimal, best_strategy_3, debug = False, fitness = True)

print("matches played: ", matches)
print("---------------------------------")
print("wins: ", wins)
print("percentage of wins_1: ", wins/matches * 100, "%")
print("---------------------------------")

  0%|          | 0/1000 [00:00<?, ?it/s]

matches played:  1000
---------------------------------
wins:  293
percentage of wins_1:  29.299999999999997 %
---------------------------------
