Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


In [50]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
from random import random, choices, choice, randint
from copy import deepcopy
from dataclasses import dataclass, field
from numpy.random import normal

## The *Nim* and *Nimply* classes

In [51]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [52]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        if k is None:
            k = (num_rows - 1) * 2 + 1
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)
    
    @property
    def k(self) -> int:
        return self._k
    
    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects
    
    def copy(self) -> 'Nim':
        new_nim = Nim(num_rows=len(self._rows), k=self._k)
        new_nim._rows = list(self._rows)
        return new_nim


## Sample (and silly) startegies 

In [53]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [54]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [55]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analyze(raw: Nim) -> dict:
    cooked = {"possible_moves": {}}
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analyze(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = choice(spicy_moves)
    return ply




In [56]:
def even_odd(state: Nim) -> Nimply:
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    row=max(possible_moves, key=lambda m: (m[1]))
    if row[0] % 2 == 0:
        n_obj = min(state.k, state.rows[row[0]] // 2)
    else:
        n_obj = max(1, min(state.k, state.rows[row[0]]))
    return Nimply(row[0], n_obj)

def one_from_min(state:Nim,weights=None)->Nimply:
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    row=min(possible_moves, key=lambda m: (m[1]))
    return Nimply(row[0],1)

def one_from_max(state:Nim)->Nimply:
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    row=max(possible_moves, key=lambda m: (m[1]))
    return Nimply(row[0],1)


In [57]:
def match(strategy_0,strategy_1,num_sets)->bool:
    strategy=(strategy_0, strategy_1)
    nim = Nim(num_sets)
    player = 0
    while nim:
        ply = strategy[player].get_strategy()(nim)
        nim.nimming(ply)
        player = 1 - player
    return player

In [58]:
class Individual:
    def __init__(self, weights: tuple[float], strategies, fitness):
        self.fitness = fitness
        self.weights = weights
        self.strategies = strategies

    def get_strategy(self):
        min_weight = min(self.weights)
        if min_weight <= 0:
            shifted_weights = [w - min_weight + 1.0 for w in self.weights]  
        else:
            shifted_weights = self.weights  
        return choices(self.strategies, weights=shifted_weights, k=1)[0]

In [59]:
strategies = [gabriele, pure_random, one_from_min, one_from_max]

optimal_player = Individual(tuple([1.0]), [optimal], 1)

num_sets = 5
num_games = 100

def fitness(individual: Individual) -> float:
    wins = [match(individual, optimal_player, num_sets) for _ in range(num_games)]
    return (num_games - sum(wins)) / num_games

In [60]:
import copy

TOURNAMENT_SIZE = 3

def create_population(size):
    pop = [Individual(tuple([random() for _ in range(len(strategies))]), strategies, random()) for _ in range(size)]
    return pop

def select_parent(pop):
    pool=[choice(pop) for _ in range(TOURNAMENT_SIZE)]
    champion = max(pool, key=lambda i: i.fitness)
    return champion

def random_mutation() ->Individual:
    return Individual(tuple([random() for _ in range(len(strategies))]), strategies, random())

def mutate(ind: Individual, σ) -> Individual:
    weights = np.array(ind.weights)
    for i in range(len(weights)):
        weights[i] += normal(0.0, σ)
    offspring = Individual(tuple(weights), strategies, 0)
    return offspring

## ( &mu;, &lambda; )-ES

In [None]:
from tqdm import tqdm

POPULATION_SIZE = 70
OFFSPRING_SIZE = 15
GENERATIONS = 200
MUTATION_PROBABILITY = 0.2

σ = 0.075
population = create_population(POPULATION_SIZE)
for i in population:
    i.fitness = fitness(i)

prev_evals = map(lambda  x: x.fitness, population)
population.sort(key=lambda i:i.fitness,reverse=True) 
initial_fitness = population[0].fitness
stats = [0,0]
for step in range(GENERATIONS):
    offspring = list()
    for counter in range(OFFSPRING_SIZE):
        if random() > MUTATION_PROBABILITY: 
            p=select_parent(population) 
            o=mutate(p, σ)
        else:
            o=random_mutation()
        offspring.append(o)

    for i in offspring:
        i.fitness = fitness(i)

    population.extend(offspring)
    population.sort(key=lambda i:i.fitness,reverse=True) 
    population = population[:POPULATION_SIZE]

    print(f"fitness: {population[0].fitness} weights: {population[0].weights} sigma: {σ}")

    curr_evals = map(lambda  x: x.fitness, population)
    succs = sum([1 for c, p in zip(curr_evals, prev_evals) if c > p])
    stats[0] += succs
    stats[1] += POPULATION_SIZE
    prev_evals = curr_evals
    if (step + 1) % 10 == 0: # adjust mutation
        if stats[0] / stats[1] < 1 / 5:
            σ /= 1.1
        elif stats[0] / stats[1] > 1 / 5:
            σ *= 1.1
        stats = [0, 0]

## Oversimplified match

In [None]:
num_iteration = 1000
count_0 = 0
count_1 = 0
num_sets = 10

logging.getLogger().setLevel(logging.INFO)

best_evolved = population[0]

strategy = (best_evolved, optimal_player)
#strategy = (optimal_player, best_evolved)


for _ in tqdm(range(num_iteration)):
    nim = Nim(num_sets)
  
    player = 0
    while nim:
        ply = strategy[player].get_strategy()(nim)
        nim.nimming(ply)
        player = 1 - player
        
    if player == 0:
        count_0 += 1
    else:
        count_1 += 1

print(f"Number games won player_0: {count_0}, Percentage: {round(count_0 / num_iteration * 100)}%")
print(f"Number games won player_1: {count_1}, Percentage: {round(count_1 / num_iteration * 100)}%")
