Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [141]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from math import ceil
import numpy as np

## The *Nim* and *Nimply* classes

In [142]:
Nimply = namedtuple("Nimply", "row, num_objects")
N_MATCHES = 100
GAME_SIZE = 5
N_CYCLES = 50

In [143]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [144]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [145]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [146]:
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


In [147]:
def evolutionary_strategy_1(state: Nim, choice_parameters: list):
    '''makes a choice based on parameters that are modified every evolutionary cycle'''
    remaining_rows =  [r for r, c in enumerate(state.rows) if c > 0]
    row_index = ceil(choice_parameters[0] / 100 * len(remaining_rows)) - 1
    row = remaining_rows[row_index]
    
    num_objects = ceil(choice_parameters[0] / 100 * state.rows[row])
    return Nimply(row, num_objects)


def evolve_1(choice_parameters):
    '''mutates the parameters used by the evolutionary strategy using a gaussian distribution'''
    sigma = 10
    row_parameter = ceil(np.random.normal(choice_parameters[0], sigma, 1)[0])
    if row_parameter > 100:
        correction = row_parameter - 100
        row_parameter = 100 - correction
    if row_parameter < 0:
        row_parameter = - row_parameter

    num_parameter = ceil(np.random.normal(choice_parameters[1], sigma, 1)[0])
    if num_parameter > 100:
        correction = row_parameter - 100
        num_parameter = 100 - correction
    if num_parameter < 0:
        num_parameter = - row_parameter
    return [row_parameter, num_parameter, 0]

In [148]:
def evolutionary_strategy_2(state: Nim, ideal_nim_sum: list):
    '''Generates some random moves and picks the best one based on parameters that are modified every evolutionary cycle'''
    '''choice parameter represents the ideal nim_sum the move wants to achireve'''
    N_MOVES = 100
    
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    chosen_move = [row, num_objects]
    possible_next_state = deepcopy(state)
    possible_next_state.nimming([row, num_objects])
    best_fitness = abs( nim_sum(possible_next_state) - ideal_nim_sum )
    
    for _ in range(N_MOVES):
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        num_objects = random.randint(1, state.rows[row])
        possible_move = [row, num_objects]

        possible_next_state = deepcopy(state)
        possible_next_state.nimming([row, num_objects])
        fitness = abs( nim_sum(possible_next_state) - ideal_nim_sum )
        if fitness < best_fitness:
            chosen_move = possible_move

    return Nimply(chosen_move[0], chosen_move[1])


def evolve_2(choice_parameters):
    '''mutates the parameters used by the evolutionary strategy using a gaussian distribution'''
    sigma = 5
    row_parameter = ceil(np.random.normal(choice_parameters, sigma, 1)[0])
    if row_parameter > GAME_SIZE:
        row_parameter = 2 *GAME_SIZE - row_parameter
    if row_parameter < -GAME_SIZE:
        row_parameter = -(row_parameter - 2 *GAME_SIZE)

    return row_parameter

## Oversimplified match

In [149]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, evolutionary_strategy_1)
strategy_name = ['optimal', 'evolutionary_strategy']


ev_parameters = [random.choice(range(1, 100)), random.choice(range(1, 100)), 0] # [row choice parameter, num choice parameter, victories]
best_ev_parameters = ev_parameters
ev_cycles = 0
while ev_cycles < N_CYCLES:
    win_rates = [0, 0]
    matches_played = 0
    while matches_played < N_MATCHES:
        nim = Nim(GAME_SIZE)
        #logging.info(f"init : {nim}")
        player = 0
        while nim:
            if player == 0:
                ply = strategy[0](nim)
            if player == 1:
                ply = strategy[1](nim, ev_parameters)
            #logging.info(f"ply: player {player} plays {ply}")
            nim.nimming(ply)
            #logging.info(f"status: {nim}")
            player = 1 - player
        #logging.info(f"status: Player {player} won!")
        matches_played += 1
        win_rates[player] += 1
        if player == 1:
            ev_parameters[2] += 1
    logging.info(f'Cycle: {ev_cycles + 1}  Scores: {strategy_name[0]} {win_rates[0]} - {strategy_name[1]} {win_rates[1]}')
    ev_cycles += 1
    if ev_parameters[2] > best_ev_parameters[2]:
        best_ev_parameters = ev_parameters
        ev_parameters = evolve_1(ev_parameters)
    else:
        ev_parameters = evolve_1(best_ev_parameters)

logging.info('Final match')
win_rates = [0, 0]
matches_played = 0
while matches_played < N_MATCHES:
    nim = Nim(GAME_SIZE)
    player = 0
    while nim:
        if player == 0:
            ply = strategy[0](nim)
        if player == 1:
            ply = strategy[1](nim, best_ev_parameters)
        nim.nimming(ply)
        player = 1 - player
    matches_played += 1
    win_rates[player] += 1
logging.info(f'\tScores: {strategy_name[0]} {win_rates[0]} - {strategy_name[1]} {win_rates[1]}')

INFO:root:Cycle: 1  Scores: optimal 70 - evolutionary_strategy 30
INFO:root:Cycle: 2  Scores: optimal 66 - evolutionary_strategy 34
INFO:root:Cycle: 3  Scores: optimal 71 - evolutionary_strategy 29
INFO:root:Cycle: 4  Scores: optimal 70 - evolutionary_strategy 30
INFO:root:Cycle: 5  Scores: optimal 76 - evolutionary_strategy 24
INFO:root:Cycle: 6  Scores: optimal 74 - evolutionary_strategy 26
INFO:root:Cycle: 7  Scores: optimal 66 - evolutionary_strategy 34
INFO:root:Cycle: 8  Scores: optimal 66 - evolutionary_strategy 34
INFO:root:Cycle: 9  Scores: optimal 74 - evolutionary_strategy 26
INFO:root:Cycle: 10  Scores: optimal 65 - evolutionary_strategy 35
INFO:root:Cycle: 11  Scores: optimal 72 - evolutionary_strategy 28
INFO:root:Cycle: 12  Scores: optimal 69 - evolutionary_strategy 31
INFO:root:Cycle: 13  Scores: optimal 77 - evolutionary_strategy 23
INFO:root:Cycle: 14  Scores: optimal 81 - evolutionary_strategy 19
INFO:root:Cycle: 15  Scores: optimal 70 - evolutionary_strategy 30
INFO

In [151]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, evolutionary_strategy_2)
strategy_name = ['optimal', 'evolutionary_strategy']


ev_parameter = random.choice(range(-GAME_SIZE, GAME_SIZE)) # [row choice parameter, num choice parameter, victories]
ev_score = 0
best_ev_parameter = ev_parameter
best_ev_score = 0
ev_cycles = 0
while ev_cycles < N_CYCLES:
    ev_score = 0
    win_rates = [0, 0]
    matches_played = 0
    while matches_played < N_MATCHES:
        nim = Nim(GAME_SIZE)
        # logging.info(f"init : {nim}")
        player = 0
        while nim:
            if player == 0:
                ply = strategy[0](nim)
            if player == 1:
                ply = strategy[1](nim, ev_parameter)
            # logging.info(f"ply: player {player} plays {ply}")
            nim.nimming(ply)
            #logging.info(f"status: {nim}")
            player = 1 - player
        # logging.info(f"status: Player {player} won!")
        matches_played += 1
        win_rates[player] += 1
    ev_score = win_rates[1]
    logging.info(f'Cycle: {ev_cycles + 1}  Scores: {strategy_name[0]} {win_rates[0]} - {strategy_name[1]} {win_rates[1]}') 
    logging.info(f'\tCurrent ev_p: {ev_parameter} \t Best ev_p: {best_ev_parameter}')
    ev_cycles += 1
    if ev_score > best_ev_score:
        best_ev_score = ev_score
        best_ev_parameter = ev_parameter
        logging.info(f'\tNew best ev_p found: {best_ev_parameter}')
        ev_parameter = evolve_2(ev_parameter)
    else:
        ev_parameter = evolve_2(best_ev_parameter)


logging.info(f'Final match using ev_p: {best_ev_parameter}')
win_rates = [0, 0]
matches_played = 0
while matches_played < N_MATCHES:
    nim = Nim(GAME_SIZE)
    player = 0
    while nim:
        if player == 0:
            ply = strategy[0](nim)
        if player == 1:
            ply = strategy[1](nim, best_ev_parameter)
        nim.nimming(ply)
        player = 1 - player
    matches_played += 1
    win_rates[player] += 1
logging.info(f'\tScores: {strategy_name[0]} {win_rates[0]} - {strategy_name[1]} {win_rates[1]}')


INFO:root:Cycle: 1  Scores: optimal 74 - evolutionary_strategy 26
INFO:root:	Current ev_p: -5 	 Best ev_p: -5
INFO:root:	New best ev_p found: -5
INFO:root:Cycle: 2  Scores: optimal 75 - evolutionary_strategy 25
INFO:root:	Current ev_p: 20 	 Best ev_p: -5
INFO:root:Cycle: 3  Scores: optimal 75 - evolutionary_strategy 25
INFO:root:	Current ev_p: -3 	 Best ev_p: -5
INFO:root:Cycle: 4  Scores: optimal 59 - evolutionary_strategy 41
INFO:root:	Current ev_p: 4 	 Best ev_p: -5
INFO:root:	New best ev_p found: 4
INFO:root:Cycle: 5  Scores: optimal 69 - evolutionary_strategy 31
INFO:root:	Current ev_p: 4 	 Best ev_p: 4
INFO:root:Cycle: 6  Scores: optimal 72 - evolutionary_strategy 28
INFO:root:	Current ev_p: 5 	 Best ev_p: 4
INFO:root:Cycle: 7  Scores: optimal 67 - evolutionary_strategy 33
INFO:root:	Current ev_p: -2 	 Best ev_p: 4
INFO:root:Cycle: 8  Scores: optimal 74 - evolutionary_strategy 26
INFO:root:	Current ev_p: 0 	 Best ev_p: 4
INFO:root:Cycle: 9  Scores: optimal 77 - evolutionary_strat