In [23]:
import math
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
import numpy as np
from dataclasses import dataclass
from tqdm.auto import tqdm

## The *Nim* and *Nimply* classes

In [24]:
Nimply = namedtuple("Nimply", "row, num_objects")
#Rule = namedtuple("Rule","description, p")


In [25]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [26]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [27]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


# Optimal Strategies

In [28]:
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


# ES Strategies

In [29]:
N_DIM = 4
TOT_CHALLENGERS = 3
TOT_GAMES_PER_CHALLENGER = 30

In [30]:
@dataclass
class Rules:
    rules : np.ndarray
    n : float
    def __init__(self,p1: float,p2: float,p3:float,p4:float) -> None:
        #rule1(p1) = "pick even matches that belong to a random row"
        #rule2(p2) = "pick all matches except 1 from the row with lowest matches"
        #rule3(p3) = "pick a half of object from the row with highest matches"
        #rule4(p4) = "Try to maintain an even number of piles with an even number of objects and an odd number of piles with an odd number of objects"
        self.rules = np.array([p1,p2,p3,p4])
        self.n = p1+p2+p3+p4

    def select_rule(self) -> int:
        num = random.uniform(0,self.n)
        prob = 0
        for i,r in enumerate(self.rules):
            prob += r
            if num < prob:
                return i
            
    def modify(self,probs: np.ndarray):
        self.rules += probs
        self.rules =  np.maximum(self.rules, 0)
        self.n = np.sum(self.rules)

@dataclass
class Individual:
    fitness: float
    genotype: Rules





def rule1(state: Nim) -> Nimply:
    #"pick even matches that belong to a random row"
    rows = [r for r, c in enumerate(state.rows) if c > 1]
    
    if not rows:
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        num_objects = 1
        return Nimply(row, num_objects)
    row = random.choice(rows)
    num_objects = random.choice([n for n in range(state.rows[row] + 1) if n%2 == 0])
    return Nimply(row, num_objects)
        
def rule2(state: Nim) -> Nimply:
    # "pick all matches except 1 from the row with lowest matches"
    rows = [(r,c-1) for r, c in enumerate(state.rows) if c > 1]
    
    if not rows:
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        num_objects = random.randint(1, state.rows[row])
        return Nimply(row, num_objects)
    
    play = min(rows,key= lambda x: x[1])
    return Nimply(play[0],play[1])


def rule3(state: Nim) -> Nimply:
    #"pick a half of object from the row with highest matches"
    rows = [(r,math.ceil(c / 2)) for r, c in enumerate(state.rows) if c > 0]
    
    play = max(rows,key= lambda x: x[1])
    return Nimply(play[0],play[1])

def rule4(state: Nim) -> Nimply:
    #Try to maintain an even number of piles with an even number of objects and an odd number of piles with an odd number of objects
    rows = [c%2==0 for c in state.rows]
    n_even = sum(rows)
    num_matches = 2 if n_even%2==0 else 1
    possible_moves = [Nimply(r, o) for r, c in enumerate(state.rows) for o in range(num_matches, c + 1,2)]
    if not possible_moves:
        possible_moves = [Nimply(r,o) for r, c in enumerate(state.rows) for o in range(1, c+1)]
    ply = random.choice(possible_moves)
    return ply
    
def select_play(state: Nim,genotype: Rules) -> Nimply:
    rule = genotype.select_rule()
    #print(f"choose rule {rule}")
    if rule == 0:
        ply = rule1(state)
    elif rule == 1:
        ply = rule2(state)
    elif rule == 2:
        ply = rule3(state)
    else:
        ply = rule4(state)

    return ply

In [31]:


def play_game(strategy,player) -> bool:
    nim = Nim(5)
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player
    return player == 0

def fitness(genotype: Rules) -> float:
    tot = 0
    wins = 0
    weight = 1
    challengers = [gabriele,pure_random,optimal]
    eval_player = lambda nim: select_play(nim,genotype)
    for i in range(TOT_CHALLENGERS):
        #change weight cause win against optimal is more relevant that win against pure_random
        for j in range(TOT_GAMES_PER_CHALLENGER):
            if j%2 == 0:
                wins += weight * play_game((eval_player,challengers[i]),0)
            else:
                wins += weight * play_game((eval_player,challengers[i]),1)
        tot += TOT_GAMES_PER_CHALLENGER * weight
        weight += 0.25
    return wins/tot

def mutation(ind: Individual,sigma: float) -> Individual:
    mut = np.random.normal(loc = 0, scale = sigma, size=(N_DIM))
    gen = deepcopy(ind.genotype)
    gen.modify(mut)
    fit = fitness(gen)  #already evaluated
    return Individual(fit,gen)


## (μ,λ)-es

In [32]:

# (μ,λ)-es
μ = 5
λ = 50
mutation_per_parent = λ // μ
σ = 0.5
scale_sigma = 0.1
population =  np.array([
    Individual(0,Rules(random.random(),random.random(),random.random(),random.random()))
    for _ in range(μ)
])
pop_sigma = np.ones((μ,)) * σ

for p in population:
    fit = fitness(p.genotype)
    p.fitness = fit
for n in tqdm(range(5000 // λ)):
    indeces = np.random.randint(0, μ, size=(λ,))
    tmp_pop = np.array([deepcopy(ind) for ind in population[indeces]])
    tmp_pop_s = pop_sigma[indeces]
    offspring = np.array([])
    # mutate all sigma
    tmp_pop_s = np.random.normal(loc = tmp_pop_s, scale = scale_sigma)
    tmp_pop_s[tmp_pop_s < 1e-5] = 1e-5
    offspring_sigma = tmp_pop_s

    # parent's mutation
    for i,t in enumerate(tmp_pop):
        offspring = np.append(offspring,mutation(t,offspring_sigma[i]))
    total_solutions = offspring       #np.append(population, offspring) to have μ+λ
    total_sol_sigma = offspring_sigma #np.append(pop_sigma, offspring_sigma) to have μ+λ

    #select new generation from total_solution
    ind_sort = np.argsort(np.array([-x.fitness for x in total_solutions]))

    population = total_solutions[ind_sort[:μ]]
    pop_sigma = total_sol_sigma[ind_sort[:μ]]
    #print(f"sigma = {pop_sigma[0]}   fitness = {population[0].fitness}")


100%|██████████| 100/100 [12:30<00:00,  7.51s/it]


In [33]:
pprint(population)

print("-"*12 + "Optimal" + "-"*12)
challengers = [pure_random,gabriele,optimal]
players = ["Pure random", "Gabriele", "Optimal"]
eval_player = optimal
totwins = 0
for i in range(3):
    wins = 0
    for j in range(100):    
        if j%2 == 0:
            wins += play_game((eval_player,challengers[i]),0)
        else:
            wins += play_game((eval_player,challengers[i]),1)
        
    print(f"against {players[i]} {wins}/100 wins")
    totwins += wins
print(totwins / 300)


print("-"*10 + "My player" + "-"*10)
challengers = [pure_random,gabriele,optimal]
players = ["Pure random", "Gabriele", "Optimal"]
eval_player = lambda nim: select_play(nim,population[0].genotype)
totwins = 0
for i in range(3):
    wins = 0
    for j in range(100):
        if j%2 == 0:
            wins += play_game((eval_player,challengers[i]),0)
        else:
            wins += play_game((eval_player,challengers[i]),1)
    print(f"against {players[i]} {wins}/100 wins")
    totwins += wins
print(totwins / 300)



array([Individual(fitness=0.8688888888888889, genotype=Rules(rules=array([ 0.        ,  1.05182712,  1.27379698, 44.28798357]), n=46.613607667980155)),
       Individual(fitness=0.8511111111111112, genotype=Rules(rules=array([ 0.        ,  0.        ,  8.29296175, 42.06660686]), n=50.359568613533796)),
       Individual(fitness=0.8222222222222222, genotype=Rules(rules=array([ 3.07957428,  0.        ,  0.62973075, 43.87007758]), n=47.57938261640372)),
       Individual(fitness=0.82, genotype=Rules(rules=array([ 1.79785354,  0.        ,  2.34795249, 45.44327245]), n=49.589078479776944)),
       Individual(fitness=0.8111111111111111, genotype=Rules(rules=array([ 0.68604728,  0.        ,  2.06408079, 46.95173312]), n=49.70186119087419))],
      dtype=object)
------------Optimal------------
against Pure random 74/100 wins
against Gabriele 90/100 wins
against Optimal 51/100 wins
0.7166666666666667
----------My player----------
against Pure random 80/100 wins
against Gabriele 89/100 wins
agai