# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Work
This code was designed, programmed and tested by
* Giacomo Fantino
* Farisan Fekri
* Lorenzo Bonannella 
* Giacomo Cauda



In [3]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from dataclasses import dataclass
from random import randint,choice
from copy import copy

## The *Nim* and *Nimply* classes

In [4]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [5]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [6]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [7]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [9]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [13]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <1 3 5 6 9>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <1 3 5 5 9>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=8)
INFO:root:status: <1 3 5 5 1>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=4)
INFO:root:status: <1 3 5 1 1>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <1 3 4 1 1>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <1 3 4 0 1>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 2 4 0 1>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=1)
INFO:root:status: <1 2 4 0 0>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <1 0 4 0 0>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=3)
INFO:root:status: <1 0 1 0 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <1 0

In [50]:
POPULATION_SIZE = 20
OFFSPRING_SIZE = 20
TOURNAMENT_SIZE = 3
MUTATION_PROBABILITY = .15
NUM_ROWS = 5
NUM_MOVES = sum([i * 2 + 1 for i in range(NUM_ROWS)])//2
UPPERBOUND_K = 3

def fitness(genotype):
    strategy = (pure_random, gabriele, optimal)
    score = 0

    for i in range(18): #number of rounds
        nim = Nim(5)
        index = 0
        player = 0
        num_moves = 0
        
        while nim:
            if player == 0:
                ply = genotype[index]
                index += 1
                
                while nim._rows[ply[0]] < ply[1]:
                    #spare move
                    ply = (randint(0, NUM_ROWS-1), 1) #spare move
                
                num_moves += 1
            else:
                ply = strategy[i%3](nim) #pick which strategy we are fighting this time
            
            nim.nimming(ply)
            player = 1 - player
            
        if player == 0: #we won
            score = score + 10
        else:
            #some points ==> if we played a lot of moves is better than losing after a couple of moves
            score = score + num_moves*0.01 #between 0 and 10
    return score

In [51]:
@dataclass
class Individual:
    fitness: int
    genotype: list[(int, int)]

population = [
    Individual(
        genotype=[],
        fitness=None,
    )
    for _ in range(POPULATION_SIZE)
]

for i in population:
    for _ in range(NUM_MOVES):
        row = randint(0, NUM_ROWS-1)
        i.genotype.extend(
            [
                (row, min(randint(1, row * 2 + 1), UPPERBOUND_K))
            ]) #what we are doing is avoiding situations like picking 5 elements from row 1 which has at most 1 element
    
    i.fitness = fitness(i.genotype)


In [52]:
def select_parent(pop): #seleziono il genitore da cui mutare
    pool = [choice(pop) for _ in range(TOURNAMENT_SIZE)]  
    champion = max(pool, key=lambda i: i.fitness)
    return champion

def mutate(ind: Individual) -> Individual:
    offspring = copy(ind)
    pos = randint(0, len(offspring.genotype)-1)  #seleziono quale flippare
    
    temp = 0
    while temp <= 0 or temp > UPPERBOUND_K: #new value MUST BE greater than 0
        temp = offspring.genotype[pos][1] + choice([-1, 1]) #increase or decrease the number of picked element from the row
    offspring.genotype[pos] = (offspring.genotype[pos][0], temp)
    offspring.fitness = None
    return offspring


def one_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    cut_point = randint(0, len(ind1.genotype))  #seleziono un punto di taglio dal set [GENITORE1/cut_point/GENITORE2]
    offspring = Individual(fitness=None,
                           genotype=ind1.genotype[:cut_point] + ind2.genotype[cut_point:])
    return offspring

In [53]:
for generation in range(100): 
    offspring = list() 
    for counter in range(OFFSPRING_SIZE):
        if random.random() < MUTATION_PROBABILITY:  # self-adapt mutation probability
            # mutation  # add more clever mutations
            p = select_parent(population)
            o = mutate(p)
        else:
            # xover # add more xovers
            p1 = select_parent(population)
            p2 = select_parent(population)
            o = one_cut_xover(p1, p2)
        offspring.append(o) 

    for i in offspring:
        i.fitness = fitness(i.genotype)
    population.extend(offspring) 
    population.sort(key=lambda i: i.fitness, reverse=True) 
    population = population[:POPULATION_SIZE] 
    print(population[0].fitness) 

150.2
150.2
150.2
150.2
150.2
150.2
150.21999999999997
150.21999999999997
150.21999999999997
150.21999999999997
150.21999999999997
150.21999999999997
160.14
160.14
160.14
160.14
160.14
160.14
160.14
160.14
160.14
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
160.17000000000002
170.07
170.07


KeyboardInterrupt: 