Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [39]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [40]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [41]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [42]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [43]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [44]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [45]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analyze(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analyze(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [46]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=8)
INFO:root:status: <1 3 5 7 1>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=3)
INFO:root:status: <1 3 2 7 1>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=1)
INFO:root:status: <1 3 2 7 0>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=3)
INFO:root:status: <1 0 2 7 0>
INFO:root:ply: player 0 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 0 2 7 0>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=5)
INFO:root:status: <0 0 2 2 0>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=2)
INFO:root:status: <0 0 2 0 0>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 1 0 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 0 0 0>
INFO:root:status: Player 1 won!


## Genetic Algorithm

We will now attempt to find a good strategy using a genetic algorithm, we must first find a good fitness function over which we will evaluate the various individuals of each generation.<br>
We define the fitness as the win-rate in a series of matches against all the other members of the population.

In [47]:
class Individual:
    def __init__(self, genome):
        self.genome = genome
        self.fitness = None
    
    def assign(self, fitness):
        self.fitness = fitness

    def play(self, nim):
        chosen_moves = []
        probability = self.genome[0]
        ignoreZero = self.genome[1]
        cooked = dict()
        cooked["possible_moves"] = dict()
        full_rows = [i for i in range(len(nim.rows)) if nim.rows[i] != 0]   
        if len(full_rows) == 1:
            return Nimply(full_rows[0], max(1, nim.rows[full_rows[0]]-1))   #if only one row remains, take all but one match
        for ply in (Nimply(r, o) for r, c in enumerate(nim.rows) for o in range(1, c + 1)):
            tmp = deepcopy(nim)
            tmp.nimming(ply)
            cooked["possible_moves"][ply] = nim_sum(tmp)
        cooked["chosen_moves"] = cooked["possible_moves"].copy()
        if ignoreZero:
            cooked["chosen_moves"] = {ply:ns for ply, ns in cooked["chosen_moves"].items() if ns != 0}
            if not cooked["chosen_moves"]:    #if all moves have nimsum 0 restore the previous dict
                cooked["chosen_moves"] = cooked["possible_moves"].copy()
        if random.random() <= probability:
            min_value = min(cooked["chosen_moves"].values())
            chosen_moves = [ply for ply, ns in cooked["chosen_moves"].items() if ns==min_value]
        if not chosen_moves:
            chosen_moves = list(cooked["chosen_moves"].keys())
        return random.choice(chosen_moves)

def duel(player1, player2=pure_random): #returns the index of a winner in a match
    nim = Nim(5)
    players =[player1, player2]
    current = random.choice([0, 1]) #random starting player
    while nim:
        ply = players[current](nim)
        nim.nimming(ply)
        current = 1 - current
    return current


def fit(individual:Individual, population=None, matches=3):
    wr = 0
    if population is not None:
        size = len(population)
        for p in population:
            for n in range(matches):
                wr += 1-duel(individual.play, p.play)
    else:
        size = 1
        for _ in range(matches):
            wr+=1-duel(individual.play)
    individual.assign(wr/(matches * size))
        


## Choosing the genes
Now that we defined a fitness function we have to find the genes that encode the behavour of the individuals. <br>
<ul>
<li>Gene 1: A float that represents the probability of choosing the lowest possible nimsum</li>
<li>Gene 2: A bool that indicates whether or not the individual should ignore the moves with a nimsum of zero when possible</li>
</ul>

In [48]:
GENERATIONS = 100
POPULATION_SIZE = 15
OFFSPRINGS_SIZE = 10
MUTATION_RATE = 0.15

def select(population) -> Individual:
    pool = [random.choice(population) for _ in range(2)]
    return pool[duel(population[0].play, population[1].play)]

def clamp(number, lower_bound, upper_bound):
    if number < lower_bound:
        number = lower_bound
    if number > upper_bound:
        number = upper_bound
    return number

def crossover(parent1:Individual, parent2:Individual) -> Individual:
    genome = []
    dominant = random.choice(["p1","p2","avg","avg"])
    if dominant == "p1":
        genome.append(parent1.genome[0])
    elif dominant == "p2":
        genome.append(parent2.genome[0])
    else:
        genome.append((parent1.genome[0]+parent2.genome[0])/2)
    genome.append(random.choice([parent1.genome[1], parent2.genome[1]]))
    return Individual(genome)

def mutate(mutant:Individual, delta=0.2) -> Individual:
    gene = random.choice([0, 1])
    if gene == 0:
        mutation = random.uniform(-delta, delta)
        genome=[clamp(mutant.genome[0] + mutation, 0, 1), mutant.genome[1]]
    else:
        genome=[mutant.genome[0], not mutant.genome[1]]
    return Individual(genome)

#starting population
population = []
for _ in range(POPULATION_SIZE):
    gene1 = random.gauss(0.5, 0.2)
    while gene1>1 or gene1<0:  #ensure that the gene is within boundaries
        gene1 = random.gauss(0.5, 0.2)
    gene2 = random.choice([True, False])
    population.append(Individual(genome=[gene1, gene2]))

for gen in range(GENERATIONS):
    offsprings = []
    for off in range(OFFSPRINGS_SIZE):
        if random.random() < MUTATION_RATE:
            o = mutate(select(population))
        else:
            parent1 = select(population)
            parent2 = select(population)
            o = crossover(parent1, parent2)
        offsprings.append(o)
    
    
    population.extend(offsprings)
    for p in population:
        fit(p, population=population)
    population.sort(key = lambda i: i.fitness, reverse=True)
    population = population[:POPULATION_SIZE]
    print(population[0].fitness, population[0].genome)





0.6666666666666666 [0.5330724379126296, True]
0.6133333333333333 [0.6040297584908363, True]
0.6 [0.7291925603648068, True]
0.5733333333333334 [0.7291925603648068, True]
0.6 [0.7571995772549457, False]
0.6666666666666666 [0.7571995772549457, False]
0.6133333333333333 [0.7571995772549457, False]
0.64 [0.7291925603648068, False]
0.64 [0.7501978230324109, False]
0.5866666666666667 [0.6666111594278216, False]
0.64 [0.7501978230324109, False]
0.6933333333333334 [0.6666111594278216, False]
0.6 [0.7336229551371194, False]
0.5733333333333334 [0.7571995772549457, False]
0.64 [0.9403635493673811, False]
0.6933333333333334 [0.8645962801824034, False]
0.7333333333333333 [1, False]
0.6266666666666667 [1, False]
0.64 [0.3933390814411213, False]
0.64 [0.3933390814411213, True]
0.6266666666666667 [1, False]
0.6533333333333333 [1, False]
0.64 [1, False]
0.6533333333333333 [1, False]
0.6266666666666667 [0.3933390814411213, True]
0.64 [1, False]
0.6266666666666667 [0.24401909084402484, True]
0.65333333333