Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [171]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
import math
import numpy as np


## The *Nim* and *Nimply* classes

In [172]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [173]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [174]:
def pure_random(state: Nim, genome=None) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [175]:
def gabriele(state: Nim, genome=None) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [176]:
def adaptive(state: Nim, genome) -> Nimply:
    """A strategy that can adapt its parameters"""

    rows = state.rows
    non_zero = np.array(rows).astype(bool).sum()
    row_t = math.floor(non_zero*genome["row"])
    row_a = -1
    for i in range(len(rows)):
        if rows[i] > 0:
            row_a += 1
        if row_a == row_t:
            row_a = i
            break
    return Nimply(row_a, max(math.floor(rows[row_a]*genome["elements"]), 1))

In [177]:

def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim, genome=None) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [178]:
'''logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, adaptive)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")'''


'logging.getLogger().setLevel(logging.INFO)\n\nstrategy = (optimal, adaptive)\n\nnim = Nim(5)\nlogging.info(f"init : {nim}")\nplayer = 0\nwhile nim:\n    ply = strategy[player](nim)\n    logging.info(f"ply: player {player} plays {ply}")\n    nim.nimming(ply)\n    logging.info(f"status: {nim}")\n    player = 1 - player\nlogging.info(f"status: Player {player} won!")'

## Trial

In [190]:
def fitness(current_genome, strategy, games=1000):
    wins = 0
    for _ in range(games):
        nim = Nim(5)
        player = 0
        while nim:
            ply = strategy[player](nim, current_genome)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            wins += 1
    return float(wins)/games


def tweak(genome, mu=0, sigma=0.1):
    dist = np.random.normal(mu, sigma, 2)

    candidate = {k: v for k, v in genome.items()}
    candidate["row"] += dist[0]
    candidate["elements"] += dist[1]

    if candidate["row"] >= 1:
        candidate["row"] = 0.999
    elif candidate["row"] < 0:
        candidate["row"] = 0

    if candidate["elements"] >= 1:
        candidate["elements"] = 0.999
    elif candidate["elements"] < 0:
        candidate["elements"] = 0
    
    return candidate


def train(strategy, generations=100, population_size=100, mu=0, sigma=0.1, sigma2=0.01, plus=True, self_adaptive=False):
    row = np.random.rand()
    elements = np.random.rand()
    if self_adaptive:
        sigma = np.random.rand()
    parent_genome = ({"row": row, "elements": elements}, fitness({"row": row, "elements": elements}, strategy))

    for _ in range(generations):
        best_genome = ({"row": 0, "elements": 0}, 0)
        new_sigma = 0
        for i in range(population_size):
            child_sigma = np.random.normal(sigma, sigma2, 1)
            child_genome = tweak(parent_genome[0], mu, sigma if not self_adaptive else child_sigma)
            fit = fitness(child_genome, strategy)
            if fit > best_genome[1] or i == 0:
                best_genome = (child_genome, fit)
                new_sigma = child_sigma

        if plus:
            if best_genome[1] > parent_genome[1]:
                parent_genome = best_genome
                if self_adaptive:
                    sigma = new_sigma
        else:
            parent_genome = best_genome
            if self_adaptive:
                sigma = new_sigma
        
        print(parent_genome)

In [193]:
strategy = (pure_random, adaptive)

train(strategy, generations=100, population_size=100, mu=0, sigma=0.1, plus=True)

({'row': 0.480251996416799, 'elements': 0.43333527460696064}, 0.633)
({'row': 0.4473880952814858, 'elements': 0.4979504663470467}, 0.635)
({'row': 0.36781634639477273, 'elements': 0.669697082317617}, 0.65)
({'row': 0.4067528852485436, 'elements': 0.9329120559164225}, 0.791)
({'row': 0.3716545502703079, 'elements': 0.999}, 0.795)
({'row': 0.3716545502703079, 'elements': 0.999}, 0.795)
({'row': 0.37947649462632477, 'elements': 0.999}, 0.796)
({'row': 0.4425248054928218, 'elements': 0.999}, 0.804)
({'row': 0.4425248054928218, 'elements': 0.999}, 0.804)
({'row': 0.4425248054928218, 'elements': 0.999}, 0.804)
({'row': 0.4209677500025346, 'elements': 0.999}, 0.805)
({'row': 0.4209677500025346, 'elements': 0.999}, 0.805)
({'row': 0.4209677500025346, 'elements': 0.999}, 0.805)
({'row': 0.4209677500025346, 'elements': 0.999}, 0.805)
({'row': 0.4209677500025346, 'elements': 0.999}, 0.805)
({'row': 0.4209677500025346, 'elements': 0.999}, 0.805)
({'row': 0.3486922925832481, 'elements': 0.999}, 0.8

## Self-adaptive

In [192]:
# shared sigma

train(strategy, generations=100, population_size=100, mu=0, sigma=0.1, sigma2=0.01, plus=True, self_adaptive=True)

({'row': 0.1125061926529804, 'elements': 0.9553842722246824}, 0.786)
({'row': 0.2559480185136406, 'elements': 0.9443181584168007}, 0.792)
({'row': 0.3068371997513636, 'elements': 0.999}, 0.796)
({'row': 0.3068371997513636, 'elements': 0.999}, 0.796)
({'row': 0.3068371997513636, 'elements': 0.999}, 0.796)
({'row': 0.34802294047925353, 'elements': 0.999}, 0.802)
({'row': 0.34802294047925353, 'elements': 0.999}, 0.802)
({'row': 0.34802294047925353, 'elements': 0.999}, 0.802)
({'row': 0.34802294047925353, 'elements': 0.999}, 0.802)
({'row': 0.34802294047925353, 'elements': 0.999}, 0.802)
({'row': 0.34802294047925353, 'elements': 0.999}, 0.802)
({'row': 0.34802294047925353, 'elements': 0.999}, 0.802)
({'row': 0.36596260663442254, 'elements': 0.999}, 0.81)
({'row': 0.36596260663442254, 'elements': 0.999}, 0.81)
({'row': 0.36596260663442254, 'elements': 0.999}, 0.81)
({'row': 0.36596260663442254, 'elements': 0.999}, 0.81)
({'row': 0.36596260663442254, 'elements': 0.999}, 0.81)
({'row': 0.3659