In [2]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from math import floor, ceil, inf
from random import randint, random, choice
import pprint 
from tqdm.auto import tqdm
import sys


  from .autonotebook import tqdm as notebook_tqdm


## The *Nim* and *Nimply* classes

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [4]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [5]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [6]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [7]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [8]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = choice(spicy_moves)
    return ply


#### Evolutionary strategies FIRST ALGORITHM

In [75]:
def mutation(indiv: list, state :Nim) -> list: 
    index = randint(0, len(indiv)-1)
    ind = indiv[index].copy()
    max_int = state.rows[ind[0]]
    new_value = randint(1, max_int)
    ind[1] = new_value
    tmp = deepcopy(state)
    tmp.nimming(Nimply(ind[0], ind[1]))
    ind[2]=nim_sum(tmp)
    return ind

def mutation2(indiv: list, state :Nim) -> list:
    index = randint(0, len(indiv)-1)
    ind = indiv[index].copy()
    max_int = state.rows[ind[0]]
    new_value = max_int - 1 if max_int-1 != 0 else 1 #we try to delete the maximum number of elements - 1 (to leave at least one element in the line)
    ind[1] = new_value
    tmp = deepcopy(state)
    tmp.nimming(Nimply(ind[0], ind[1]))
    ind[2]=nim_sum(tmp)
    return ind

def mutation3(indiv: list, state :Nim) -> list:
    current_nim_sum = nim_sum(state)
    index = randint(0, len(indiv)-1) if len(indiv) > 1 else 0
    
    ind = indiv[index].copy()
    elements_in_line = state.rows[ind[0]]
    rule = current_nim_sum ^ elements_in_line
    
    if rule < elements_in_line:
        #print(f"sub in the line {ind[0]}, the value {elements_in_line - rule} before it has {elements_in_line}")
        new_value = elements_in_line - rule   
    else :
        new_value = randint(1, elements_in_line) if elements_in_line > 1 else 1
    tmp = deepcopy(state)
    tmp.nimming(Nimply(ind[0], new_value))
    ind[2] = nim_sum(tmp)
    ind[1] = new_value
    return ind



def crossover(ind_one : list, ind_two : list, state : Nim) -> list :
    #The idea is to take the row of the first individual and the value of the second individual 
    # the opposite operation if the first one is not possible
    # and if both are not possible, we keep the first individual
    #calcualte the max value of the row of the first individual
    max_ind_one = state.rows[ind_one[0]]

    if ind_two[1] <= max_ind_one:
        #print("change 1")
        #print(f"setting in the line {ind_one[0]}, the value {ind_two[1]}")
        new_ind = [ind_one[0], ind_two[1]]
        tmp = deepcopy(state)
        tmp.nimming(new_ind)
        new_ind.append(nim_sum(tmp))
        return new_ind
    else :
        max_ind_two = state.rows[ind_two[0]]
        if ind_one[1] <= max_ind_two:
            #print("change 2")
            #print(f"setting in the line {ind_two[0]}, the value {ind_one[1]}")
            new_ind = [ind_two[0], ind_one[1]]
            tmp = deepcopy(state)
            tmp.nimming(new_ind)
            new_ind.append(nim_sum(tmp))
            return new_ind
        else :
            return ind_one.copy()

def tournament_selection(population : list, k :int = 2, sorted_population : int =2 ) -> list :
    selected = list()
    for _ in range(k):
        selected.append(population[randint(0, len(population)-1)])
    selected = np.array(selected)
    sorted_index = np.argsort(selected[:,sorted_population])
    return (selected[sorted_index])[0]


def evolutionary_strategies(state : Nim, generation : int = 3, percentage_population : float = 0.8, mutation_rate : float = 0.10, sorted_population : int = 2) : 
#sorted population = 1 -> sort by min number of objects taken
#sorted population = 2 -> sort by nim sum
    analysis = analize(state)['possible_moves'] ##retrieves all possible moves and their nim sum
    population_size=ceil(len(analysis.keys())  * percentage_population) ##select random possible moves

    offspring_size = ceil(population_size/2)
    ##select random possible moves
    #p = list(set([ randint(0, len(analysis.keys())-1) for _ in range(population_size) ]))
    p = list()
    count = 0
    while count < population_size:
        tmp = randint(0, len(analysis.keys())-1)
        if tmp not in p:
            p.append(tmp)
            count += 1

    population  = []

    for key, value in analysis.items():
        population.append(list(key) + [value if value != 0 else sys.maxsize])
    population = np.array(population)[p]
    sorted_index = np.argsort(population[:,sorted_population])
    population = list(population[sorted_index])
    offspring = list()
    for _ in range(generation) :
        for index in range(offspring_size):
            if random() < mutation_rate : 
                new_ind = mutation(population, state=state)
            else :
                new_ind_one = tournament_selection(population , k=3, sorted_population= sorted_population)
                new_ind_two = tournament_selection(population, k=3, sorted_population= sorted_population)
                new_ind = crossover(new_ind_one, new_ind_two ,state=state) 
            offspring.append(new_ind)
        population = np.vstack([population, offspring])
        sorted_index = np.argsort(population[:,sorted_population])
        population = list(population[sorted_index])[:population_size]
    #print("deleting",population[0][0], population[0][1])
    return Nimply(int(population[0][0]), int(population[0][1]))

    

### ES second algorithm 
using a comma strategy, that means the offspring completely replaces the parent population

In [72]:

def evolutionary_strategies_comma(state : Nim, generation : int = 3, percentage_population : float = 0.8, mutation_rate : float = 0.10, sorted_population : int = 2) : 
#sorted population = 1 -> sort by min number of objects taken
#sorted population = 2 -> sort by nim sum
    analysis = analize(state)['possible_moves'] ##retrieves all possible moves and their nim sum
    population_size=ceil(len(analysis.keys())  * percentage_population) ##select random possible moves

    offspring_size = ceil(population_size)
    ##select random possible moves
    #p = list(set([ randint(0, len(analysis.keys())-1) for _ in range(population_size) ]))
    p = list()
    count = 0
    while count < population_size:
        tmp = randint(0, len(analysis.keys())-1)
        if tmp not in p:
            p.append(tmp)
            count += 1

    population  = []

    for key, value in analysis.items():
        population.append(list(key) + [value if value != 0 else sys.maxsize])
    population = np.array(population)[p]
    sorted_index = np.argsort(population[:,sorted_population])
    population = list(population[sorted_index])
    offspring = list()
    for _ in range(generation) :
        for index in range(offspring_size):
            if random() < mutation_rate : 
                new_ind = mutation(population, state=state)
            else :
                new_ind_one = tournament_selection(population , k=3, sorted_population= sorted_population)
                new_ind_two = tournament_selection(population, k=3, sorted_population= sorted_population)
                new_ind = crossover(new_ind_one, new_ind_two ,state=state) 
            offspring.append(new_ind)
        population = np.array(offspring)
        sorted_index = np.argsort(population[:,sorted_population])
        population = list(population[sorted_index])
    #print("deleting",population[0][0], population[0][1])
    return Nimply(int(population[0][0]), int(population[0][1]))

### Matches 

Select the number of matches to play between 2 players.

In [76]:
NUM_MATCHES = 500
wins = [0, 0]
sequence = []
K = 20
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, evolutionary_strategies)
#logging.info(f"init : {nim}")
player = 0
generation = [3, 5, 10, 15, 20,40,80,100]
for gen in range(len(generation)):
    with tqdm(total=NUM_MATCHES) as pbar:
        for i in range(NUM_MATCHES):
            nim = Nim(K) 
            while nim:
                if strategy == evolutionary_strategies:
                    ply = strategy[player](nim, generation=generation[gen], percentage_population=0.8, mutation_rate=0.10, sorted_population=2)                   
                else: 
                    ply = strategy[player](nim)
                #logging.info(f"ply: player {player} plays {ply}")
                #print(f"ply: player {player} plays {ply}")
                nim.nimming(ply)
                #logging.info(f"status: {nim}")
                player = 1 - player
            #print(f"winner: player {player}")
            wins[player] += 1
            sequence.append(player)
            pbar.update(1)
        
        print(f"wins : {wins} with generation {generation[gen]} ")
        print(f"Percentage : player 0 {wins[0]/NUM_MATCHES}, player 1 {wins[1]/NUM_MATCHES}")
        wins = [0, 0]
# for i in range(NUM_MATCHES):
#     print('player : ', sequence[i]) 


100%|██████████| 500/500 [14:34<00:00,  1.75s/it]


wins : [373, 127] with generation 3 
Percentage : player 0 0.746, player 1 0.254


 12%|█▏        | 62/500 [01:46<12:39,  1.74s/it]