# Lab 2: ES

In this lab we will try to implement an evolutionary strategy in order to find the best strategy to win a Nim match (miserè variation).

In [101]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [102]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [103]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


### Pure random 

In [104]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


### Optimal

In [105]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analyze(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analyze(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [106]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=2)
INFO:root:status: <1 3 5 7 7>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <1 3 5 6 7>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=3)
INFO:root:status: <1 0 5 6 7>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=2)
INFO:root:status: <1 0 3 6 7>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=5)
INFO:root:status: <1 0 3 6 2>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 0 3 6 2>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=3)
INFO:root:status: <0 0 3 3 2>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 2 3 2>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=2)
INFO:root:status: <0 0 2 1 2>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 2 0 2>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0

## Genetic Algorithm

We will now attempt to find a good strategy using a genetic algorithm, we must first find a good fitness function over which we will evaluate the various individuals of each generation.<br>
We define the fitness as the win-rate in a series of 7 matches against the `optimal` strategy.

In [107]:
class Individual:
    def __init__(self, genome):
        self.genome = genome
        self.fitness = None
    
    def assign(self, fitness):
        self.fitness = fitness

    def play(self, nim):
        chosen_moves = []
        probability = self.genome[0]
        ignoreZero = self.genome[1]
        cooked = dict()
        cooked["possible_moves"] = dict()
        full_rows = [i for i in range(len(nim.rows)) if nim.rows[i] != 0]   
        if len(full_rows) == 1:
            return Nimply(full_rows[0], max(1, nim.rows[full_rows[0]]-1))   #if only one row remains, take all but one match
        for ply in (Nimply(r, o) for r, c in enumerate(nim.rows) for o in range(1, c + 1)):
            tmp = deepcopy(nim)
            tmp.nimming(ply)
            cooked["possible_moves"][ply] = nim_sum(tmp)
        cooked["chosen_moves"] = cooked["possible_moves"].copy()
        if ignoreZero:
            cooked["chosen_moves"] = {ply:ns for ply, ns in cooked["chosen_moves"].items() if ns != 0}
            if not cooked["chosen_moves"]:    #if all moves have nimsum 0 restore the previous dict
                cooked["chosen_moves"] = cooked["possible_moves"].copy()
        if random.random() <= probability:
            min_value = min(cooked["chosen_moves"].values())
            chosen_moves = [ply for ply, ns in cooked["chosen_moves"].items() if ns==min_value]
        if not chosen_moves:
            chosen_moves = list(cooked["chosen_moves"].keys())
        return random.choice(chosen_moves)

def duel(player1, player2=pure_random, first=None): #returns the index of a winner in a match
    nim = Nim(5)
    players =[player1, player2]
    if first is not None:
        current = first
    else:
        current = random.choice([0, 1]) #random starting player
    while nim:
        ply = players[current](nim)
        nim.nimming(ply)
        current = 1 - current
    return current


def fit(individual:Individual, matches=7):
    wins = 0
    for _ in range(matches):
        wins+=1-duel(individual.play, optimal)
    individual.assign(wins/matches)
        


## Choosing the genes
Now that we defined a fitness function we have to find the genes that encode the behavour of the individuals.
- `gene1` : A float that represents the probability of choosing a move that results in the lowest possible nimsum
- `gene2` : A bool that indicates whether or not the individual should ignore the moves with a nimsum of zero when possible


In [108]:


def select(population) -> Individual:
    pool = [random.choice(population) for _ in range(2)]
    return pool[duel(population[0].play, population[1].play)]

def clamp(number, lower_bound, upper_bound):
    if number < lower_bound:
        number = lower_bound
    if number > upper_bound:
        number = upper_bound
    return number

def crossover(parent1:Individual, parent2:Individual) -> Individual:
    genome = []
    dominant = random.choice(["p1","p2","avg","avg"])
    if dominant == "p1":
        genome.append(parent1.genome[0])
    elif dominant == "p2":
        genome.append(parent2.genome[0])
    else:
        genome.append((parent1.genome[0]+parent2.genome[0])/2)
    genome.append(random.choice([parent1.genome[1], parent2.genome[1]]))
    return Individual(genome)

def mutate(mutant:Individual, delta=0.2) -> Individual:
    gene = random.choice([0, 1])
    if gene == 0:
        mutation = random.uniform(-delta, delta)
        genome=[clamp(mutant.genome[0] + mutation, 0, 1), mutant.genome[1]]
    else:
        genome=[mutant.genome[0], not mutant.genome[1]]
    return Individual(genome)






### $\mu + \lambda$
In each generation the population consists of the fittest individuals among both the previous populations and the offsprings of the current one.

In [109]:
GENERATIONS = 100
POPULATION_SIZE = 15
OFFSPRINGS_SIZE = 10
MUTATION_RATE = 0.15

#starting population
population = []
for _ in range(POPULATION_SIZE):
    gene1 = random.gauss(0.5, 0.2)
    while gene1>1 or gene1<0:  #ensure that the gene is within boundaries
        gene1 = random.gauss(0.5, 0.2)
    gene2 = random.choice([True, False])
    population.append(Individual(genome=[gene1, gene2]))

for gen in range(GENERATIONS):
    offsprings = []
    for off in range(OFFSPRINGS_SIZE):
        if random.random() < MUTATION_RATE:
            o = mutate(select(population))
        else:
            parent1 = select(population)
            parent2 = select(population)
            o = crossover(parent1, parent2)
        offsprings.append(o)
    
    
    population.extend(offsprings)
    for p in population:
        fit(p)
    population.sort(key = lambda i: i.fitness, reverse=True)
    population = population[:POPULATION_SIZE]
    print(population[0].fitness, population[0].genome)


1.0 [0.33570998508139804, True]
1.0 [0.45062353628598584, True]
1.0 [0.6488226729534488, True]
1.0 [0.6488226729534488, True]
1.0 [0.6488226729534488, True]
1.0 [0.6488226729534488, True]
1.0 [0.6488226729534488, True]
1.0 [0.6641403407332406, True]
1.0 [0.6641403407332406, True]
1.0 [0.5836613215577324, True]
1.0 [0.6641403407332406, True]
1.0 [0.6641403407332406, True]
1.0 [0.6641403407332406, True]
1.0 [0.6641403407332406, True]
1.0 [0.7026840970799334, True]
1.0 [0.7174705176437712, True]
1.0 [0.6641403407332406, True]
1.0 [0.6709433567379801, True]
1.0 [0.6929153461892716, True]
1.0 [0.6929153461892716, True]
1.0 [0.6738466178088427, True]
1.0 [0.7026840970799334, True]
1.0 [0.7026840970799334, True]
1.0 [0.7026840970799334, True]
1.0 [0.6834161702662107, True]
1.0 [0.7026840970799334, True]
1.0 [0.6953575339119371, True]
1.0 [0.8731524095177017, True]
1.0 [0.8163453108760728, True]
1.0 [0.8163453108760728, True]
1.0 [0.8163453108760728, True]
1.0 [0.8163453108760728, True]
1.0 [0

### $\mu , \lambda$
In each generation the population consists of the fittest individuals among the offsprings of the previous population.

In [114]:
GENERATIONS = 100
POPULATION_SIZE = 15
OFFSPRINGS_SIZE = 30
MUTATION_RATE = 0.15


#starting population
population = []
for _ in range(POPULATION_SIZE):
    gene1 = random.gauss(0.5, 0.2)
    while gene1>1 or gene1<0:  #ensure that the gene is within boundaries
        gene1 = random.gauss(0.5, 0.2)
    gene2 = random.choice([True, False])
    population.append(Individual(genome=[gene1, gene2]))

for gen in range(GENERATIONS):
    offsprings = []
    for off in range(OFFSPRINGS_SIZE):
        if random.random() < MUTATION_RATE:
            o = mutate(select(population))
        else:
            parent1 = select(population)
            parent2 = select(population)
            o = crossover(parent1, parent2)
        offsprings.append(o)
    
    
    population=offsprings.copy()
    for p in population:
        fit(p)
    population.sort(key = lambda i: i.fitness, reverse=True)
    population = population[:POPULATION_SIZE]
    print(population[0].fitness, population[0].genome)





1.0 [0.3395520653330143, True]
1.0 [0.6820254931597185, True]
1.0 [0.7678112650196436, True]
1.0 [0.7704342224928793, True]
1.0 [0.6662566960354164, True]
1.0 [0.6615476617333484, True]
1.0 [0.7684634718456793, True]
1.0 [0.8229067522254447, True]
1.0 [0.7684634718456793, True]
1.0 [0.7684634718456793, True]
1.0 [0.7407678888947925, True]
1.0 [0.850115792065791, True]
1.0 [0.850115792065791, True]
1.0 [0.8801262181952505, True]
1.0 [0.8665488591948124, True]
1.0 [0.8241752482904787, True]
1.0 [0.7617669002863571, True]
1.0 [0.8634884166473913, True]
1.0 [0.8713949469150857, True]
1.0 [0.8801740574770336, True]
1.0 [0.8801740574770336, True]
1.0 [0.8335219345029631, True]
1.0 [0.9014055687731088, True]
1.0 [0.9312463919039824, True]
1.0 [0.9180257511825141, True]
1.0 [0.9668967813589042, True]
1.0 [0.9291226042191914, True]
1.0 [0.9970644276426046, True]
1.0 [0.9949447571302759, True]
1.0 [0.971187692262657, True]
1.0 [1, True]
1.0 [0.9834483906794521, True]
1.0 [0.9587694823553645, Tru

### Observations
Both types of survival selection seem to be able to produce individuals capable to consistently outperform `optimal` even in early generations.
At a glance we can see that the value of `gene1` doesn't seem to matter as long as `gene2` is **True**, despite this, over the course of multiple generations higher values of `gene1` are preferred.<br>
Now let's see what happens if we define a new fitness function able to rank individuals according to their winrate against the members of the same population, disregarding their performance against `optimal`.

In [116]:
def competitive_fit(individual:Individual, population=None, matches=3):
    wins = 0
    if population is not None:
        size = len(population)
        for p in population:
            for _ in range(matches):
                wins += 1-duel(individual.play, p.play)
    else:
        size = 1
        for _ in range(matches):
            wins+=1-duel(individual.play)
    individual.assign(wins/(matches * size))

For the sake of simplicity we will only try the $(\mu + \lambda)$ paradigm, but we expect both selection methods to give us similar results.

In [118]:
GENERATIONS = 100
POPULATION_SIZE = 15
OFFSPRINGS_SIZE = 10
MUTATION_RATE = 0.15


#starting population
population = []
for _ in range(POPULATION_SIZE):
    gene1 = random.gauss(0.5, 0.2)
    while gene1>1 or gene1<0:  #ensure that the gene is within boundaries
        gene1 = random.gauss(0.5, 0.2)
    gene2 = random.choice([True, False])
    population.append(Individual(genome=[gene1, gene2]))

for gen in range(GENERATIONS):
    offsprings = []
    for off in range(OFFSPRINGS_SIZE):
        if random.random() < MUTATION_RATE:
            o = mutate(select(population))
        else:
            parent1 = select(population)
            parent2 = select(population)
            o = crossover(parent1, parent2)
        offsprings.append(o)
    
    
    population.extend(offsprings)
    for p in population:
        competitive_fit(p, population=population)
    population.sort(key = lambda i: i.fitness, reverse=True)
    population = population[:POPULATION_SIZE]
    print(population[0].fitness, population[0].genome)

0.68 [0.607939095369748, True]
0.7333333333333333 [0.8693021299112054, True]
0.6666666666666666 [0.8693021299112054, True]
0.7333333333333333 [0.8693021299112054, True]
0.68 [0.7900591458731716, False]
0.7866666666666666 [0.8693021299112054, False]
0.7466666666666667 [0.8693021299112054, False]
0.6133333333333333 [0.8387671541672437, False]
0.6 [0.8693021299112054, False]
0.6 [0.8693021299112054, False]
0.6133333333333333 [0.7900591458731716, False]
0.6 [0.8540346420392245, False]
0.6666666666666666 [0.8502177700712293, False]
0.6666666666666666 [0.8502177700712293, False]
0.6 [0.8597599499912174, False]
0.7733333333333333 [1, False]
0.7066666666666667 [1, False]
0.68 [1, False]
0.64 [1, False]
0.68 [1, False]
0.6933333333333334 [1, False]
0.5866666666666667 [1, False]
0.6533333333333333 [1, False]
0.68 [1.0, False]
0.6 [1, False]
0.6266666666666667 [1.0, False]
0.6133333333333333 [1.0, False]
0.6 [1.0, False]
0.6133333333333333 [1.0, False]
0.64 [1.0, False]
0.6133333333333333 [1.0, F

Quite surprisingly the optimal individual is very different from the one obtained while fitting over the winrate against `optimal`.<br>
An high value of `gene1` is preferred and the optimal value of `gene2` as given by this fitness is **False**.<br>
Another interesting thing to note is that, in earlier generations, the optimal value of `gene2` is **True**, this is probably to offset the random nature of the moves produced by a low `gene1`, but, as the probability of choosing a low nim_sum increases, the optimal strategy to win against similar opponents is to consider values equal to zero.

## References
[Giovanni Squillero for the original code](https://github.com/squillero/computational-intelligence)<br>