Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [1]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
from random import random, choice, randint
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [2]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [3]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [4]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [5]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [6]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [7]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = choice(spicy_moves)
    return ply


## Oversimplified match

In [8]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=3)
INFO:root:status: <1 3 5 4 9>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=2)
INFO:root:status: <1 3 5 2 9>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=3)
INFO:root:status: <1 0 5 2 9>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=5)
INFO:root:status: <1 0 5 2 4>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=2)
INFO:root:status: <1 0 5 2 2>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 0 5 2 2>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=1)
INFO:root:status: <0 0 5 2 1>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=1)
INFO:root:status: <0 0 5 2 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=5)
INFO:root:status: <0 0 0 2 0>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 1 0>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0

Copyright **`(c)`** 2023 Gabriele Ferro `<gabrieleferro.00@gmail.com>`  
[`https://github.com/Gabbo62/ComputationalIntelligence`](https://github.com/Gabbo62/ComputationalIntelligence) ~ 
[`LICENSE`](https://github.com/Gabbo62/ComputationalIntelligence/blob/master/LICENSE)

# Fixed Rule

In [9]:
def remain_sum(state: Nim) -> int:
    return sum(state.rows)

def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = remain_sum(tmp)
    return cooked

def fixed_rule(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if np.log2(ns+1) % 1 == 0 and ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    # print(spicy_moves)
    ply = choice(spicy_moves)
    return ply

In [10]:
def play(num_rows: int, strategy: tuple, print_log: bool = False, start_player: int = 0):
    nim = Nim(num_rows)
    if print_log: logging.info(f"init : {nim}")
    player = start_player
    while nim:
        ply = strategy[player](nim)
        if print_log: logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        if print_log: logging.info(f"status: {nim}")
        player = 1 - player
    if print_log: logging.info(f"status: Player {player} won!")
    return player

### Match

In [11]:
strategy = (optimal, fixed_rule)

winner = []
num_play = 1000
nim_rows = 5
start_player = 0
for _ in range(0, num_play):
    winner.append(play(nim_rows, strategy, print_log=False, start_player=start_player))
    start_player = 1 - start_player
print(f'{sum(winner)/num_play*100}%')


81.2%


# Evolved Rule

In [37]:
import copy
from dataclasses import dataclass
from random import gauss
from tqdm import tqdm

@dataclass
class Nim_move:
    fitness: int
    move: Nimply
    
    def __init__(self, fitness:int, move:Nimply=None) -> None:
        self.fitness = fitness
        self.move = move

OFFSPRING_SIZE = 100
MUTATION_PROBABILITY = 1.0
NGENERATION = 10

def mutate(starting_state: Nim_move, state: Nim) -> Nim_move:
    if starting_state.move is None:
        row = choice([i for i in range(len(state.rows)) if state.rows[i] != 0])
        take = round(state.rows[row]/2)
    elif state.rows[starting_state.move.row] == 0:
        row = choice([i for i in range(len(state.rows)) if state.rows[i] != 0])
        take = round(state.rows[row]/2)
    else:
        row = starting_state.move.row
        take = starting_state.move.num_objects

    take = round(gauss(take, tweak_var))
    take = min(state.rows[row], max(1, take))
    move = Nimply(row, take)
    state = deepcopy(state)
    
    state.nimming(move)
    fitness = nim_sum(state)
        
    return Nim_move(fitness, move)

def evolved_rule(state: Nim) -> Nimply:    
    population = Nim_move(nim_sum(state))
    
    for _ in range(NGENERATION):
        offspring = list([population] if population.move is not None else [])
        for _ in range(OFFSPRING_SIZE):
            if random() < MUTATION_PROBABILITY:
                offspring.append(mutate(population, state))

        population = min(offspring, key=lambda i: i.fitness)
    return population.move

### Training

In [53]:
strategy = (optimal, evolved_rule)

winner = []
num_play = 100
nim_rows = 5
print_log = num_play < 10
start_player = 0

tweak_var = 0.0001

for i in tqdm(range(0, num_play)):
    current_winner = play(nim_rows, strategy, print_log=print_log, start_player=start_player)
    winner.append(current_winner)
        
    if i%5 == 0 and i != 0 and sum(winner[i-5:i]) > 1:
        tweak_var *= np.e**(1/3)
    elif i%5 == 0 and i != 0:
        tweak_var /= np.e**(1/12)
    
print(f'Training obtained variance = {tweak_var}')
print(f'Training winning percentage: {sum(winner)/num_play*100}%')

100%|██████████| 100/100 [00:34<00:00,  2.90it/s]

Training obtained variance = 0.016131086363082886
Training winning percentage: 51.0%





### Match

In [54]:
strategy = (optimal, evolved_rule)

winner = []
num_play = 100
nim_rows = 5
print_log = num_play < 10
start_player = 0

print(f'Playing with variance = {tweak_var}')

for _ in tqdm(range(0, num_play)):
    current_winner = play(nim_rows, strategy, print_log=print_log, start_player=start_player)
    winner.append(current_winner)
    start_player = 1 - start_player
    
print(f'Winning percentage: {sum(winner)/num_play*100}%')

Playing with variance = 0.016131086363082886


100%|██████████| 100/100 [00:36<00:00,  2.74it/s]

Winning percentage: 43.0%



