Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [41]:
import logging
import numpy as np
import random

from pprint import pprint, pformat
from collections import namedtuple
from copy import deepcopy

## The *Nim* and *Nimply* classes

In [42]:
Nimply = namedtuple("Nimply", "row, num_objects")
N_MATCHES = 30
N_TRAINS = 50
MAX_PROB = 100

In [43]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) strategies 

In [44]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    
    #----- random approach enhancements (including k variant) -----
    #Forcing k
    if num_objects >= state._k:
        num_objects = state._k

    return Nimply(row, num_objects)


In [45]:
def pure_random_2(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    
    #Forcing to generate another k
    while num_objects >= state._k:
        num_objects = random.randint(1, state.rows[row])

    return Nimply(row, num_objects)

In [46]:
def weighted_random(state: Nim, alfa, beta) -> Nimply:
    """A weighted random move"""
    idx = abs(int(alfa/MAX_PROB))
    row = [r for r, c in enumerate(state.rows) if c > 0]
    
    # Avoiding out of bound in rows
    if idx >= len(row):
        row = row[-1]
    else:
        row = row[idx]

    num_objects = int(np.round((beta/MAX_PROB)*random.randint(1, state.rows[row]+1)))
    #Forcing a suitable num_object regarding all constraints
    if num_objects <= 0:
        num_objects = 1
    else:
        if num_objects > state._k:
            num_objects = state._k
        if num_objects > state.rows[row]:
            num_objects = state.rows[row]
 
    return Nimply(row, num_objects)

In [47]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, min(c+1, state._k+1))]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [48]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [49]:
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    ret = int("".join(str(_) for _ in xor), base=2)
    return ret


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, min(c+1, raw._k+1))):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [50]:
logging.getLogger().setLevel(logging.INFO)
strategy = (optimal, weighted_random)

alfa = random.randint(0, MAX_PROB)
beta = random.randint(0, MAX_PROB)
# sigma chosen randomly
sigma = random.randint(0, MAX_PROB)

alfa_best = alfa
beta_best = beta
wins_best = 0

for _ in range(0, N_TRAINS):
    p_wins = [0, 0]
    print(f"alfa_b: {alfa_best}, beta_best: {beta_best}")
    #Executing N_MATCHES fro alfa and beta parameters
    for _ in range(0, N_MATCHES):
        nim = Nim(5, 4)
        #logging.info(f"init : {nim}")
        player = 0
        while nim:
            if player == 0:
                ply = strategy[player](nim)
            else:
                ply = strategy[player](nim, alfa_best, beta_best)
            #logging.info(f"ply: player {player} plays {ply}")
            nim.nimming(ply)
            #logging.info(f"status: {nim}")
            player = 1 - player
        #logging.info(f"----------------------------     status: Player {player} won!      ----------------------------")
        p_wins[player] += 1

    print(f"BEST WINS: {wins_best} \tACTUAL WINS: {p_wins[1]}")
        
    #  --- PLUS STRATEGY ---
    #If with these parameters score is better than the previous, model evolves from actual parameters
    if wins_best < p_wins[1]:
        wins_best = p_wins[1]
        alfa_best = round(np.random.normal(alfa, sigma))
        beta_best = round(np.random.normal(beta, sigma))
    else:
        alfa_best = round(np.random.normal(alfa_best, sigma))
        beta_best = round(np.random.normal(beta_best, sigma))

    #  --- COMMA STRATEGY ---
    #Model evolves from actual parameters
    #alfa_best = round(np.random.normal(alfa_best, sigma))
    #beta_best = round(np.random.normal(beta_best, sigma))

p_wins = [0, 0]
# Doing last N_MATCHES with trained values
for _ in range(0, N_MATCHES):
    nim = Nim(5, 4)
    logging.info(f"init : {nim}")
    player = 0
    while nim:
        if player == 0:
            ply = strategy[player](nim)
        else:
            ply = strategy[player](nim, alfa_best, beta_best)
        logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        logging.info(f"status: {nim}")
        player = 1 - player
    logging.info(f"----------------------------     status: Player {player} won!      ----------------------------")
    p_wins[player] += 1
print(p_wins)

alfa_b: 97, beta_best: 44
BEST WINS: 0 	ACTUAL WINS: 12
alfa_b: 54, beta_best: 49


BEST WINS: 12 	ACTUAL WINS: 13
alfa_b: 91, beta_best: 28
BEST WINS: 13 	ACTUAL WINS: 10
alfa_b: 79, beta_best: 19
BEST WINS: 13 	ACTUAL WINS: 6
alfa_b: 52, beta_best: 59
BEST WINS: 13 	ACTUAL WINS: 13
alfa_b: 48, beta_best: 61
BEST WINS: 13 	ACTUAL WINS: 9
alfa_b: 42, beta_best: 50
BEST WINS: 13 	ACTUAL WINS: 15
alfa_b: 94, beta_best: 71
BEST WINS: 15 	ACTUAL WINS: 6
alfa_b: 106, beta_best: 46
BEST WINS: 15 	ACTUAL WINS: 11
alfa_b: 90, beta_best: 46
BEST WINS: 15 	ACTUAL WINS: 17
alfa_b: 119, beta_best: 62
BEST WINS: 17 	ACTUAL WINS: 7
alfa_b: 123, beta_best: 33
BEST WINS: 17 	ACTUAL WINS: 5
alfa_b: 112, beta_best: 53
BEST WINS: 17 	ACTUAL WINS: 8
alfa_b: 97, beta_best: 58
BEST WINS: 17 	ACTUAL WINS: 14
alfa_b: 69, beta_best: 27
BEST WINS: 17 	ACTUAL WINS: 4
alfa_b: 45, beta_best: 24
BEST WINS: 17 	ACTUAL WINS: 7
alfa_b: 30, beta_best: 5
BEST WINS: 17 	ACTUAL WINS: 16
alfa_b: 23, beta_best: 30
BEST WINS: 17 	ACTUAL WINS: 4
alfa_b: -4, beta_best: 20
BEST WINS: 17 	ACTUAL WINS: 7
alfa_b:

INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <1 1 5 7 9>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 1 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=2)
INFO:root:status: <0 1 5 5 9>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 0 5 5 9>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=4)
INFO:root:status: <0 0 1 5 9>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 0 5 9>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=1)
INFO:root:status: <0 0 0 5 8>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=4)
INFO:root:status: <0 0 0 1 8>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 0 8>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=4)
INFO:root:status: <0 0 0 0 4>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=2)
INFO:root:status: <0 0

BEST WINS: 17 	ACTUAL WINS: 5
alfa_b: 107, beta_best: 169
BEST WINS: 17 	ACTUAL WINS: 11


INFO:root:ply: player 0 plays Nimply(row=4, num_objects=3)
INFO:root:status: <0 0 1 1 4>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 0 1 4>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=4)
INFO:root:status: <0 0 0 1 0>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 0 0>
INFO:root:----------------------------     status: Player 0 won!      ----------------------------
INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=2)
INFO:root:status: <1 3 5 5 9>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 5 9>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 2 5 5 9>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=2)
INFO:root:status: <0 0 5 5 9>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=4)
INFO:root:status: <0 0 5 1 9>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=4)
INFO:root:st

[23, 7]
