Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [2]:
# Import libraries

import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
import numpy as np

## The *Nim* and *Nimply* classes

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [4]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        if row < 0 or row >= len(self._rows):
            raise ValueError("Invalid row index.")
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [5]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [6]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [7]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [8]:

def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [9]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=6)
INFO:root:status: <1 3 5 1 9>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 2 5 1 9>
INFO:root:ply: player 0 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 2 5 1 9>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=2)
INFO:root:status: <0 0 5 1 9>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=2)
INFO:root:status: <0 0 3 1 9>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 3 0 9>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=2)
INFO:root:status: <0 0 3 0 7>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 2 0 7>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=6)
INFO:root:status: <0 0 2 0 1>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 1 0 1>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=1)
INFO:root:status: <0 0

First let's see what happen when the two players have the same strategy : we will play 50 times and see the winning rates of each player

In [10]:
logging.getLogger().setLevel(logging.INFO)

strategy = (pure_random, pure_random)

score = [0, 0]
for i in range(100):
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:score: [46, 54]


In [11]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, optimal)

score = [0, 0]
for i in range(100):
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:score: [58, 42]


In [12]:
logging.getLogger().setLevel(logging.INFO)

strategy = (gabriele, gabriele)

score = [0, 0]
for i in range(100):
    nim = Nim(6)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:score: [100, 0]


We can see that if players choose the same strategies than they are equally likely to win except for gabriele where its is only the second player that win if the number of row is odd and the first one if the number of row is even (Which makes sense because you take all the objects of the smallest row so if both players plays like that the issue of the game is always the same and oonly depend on the number of rows)

1) Write an expert agent using Nim-Sum

In [13]:
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    # Create binary lines were the last bits are the number of objects in each row(written in binary)
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)

print(nim_sum(Nim(5)))

9


In [22]:
def expert_nim_agent(state: Nim) -> Nimply:
    nim = nim_sum(state)
    non_null = len([r for r in state.rows if r > 0])

    # Case 1: There is only one row with objects, we take all the objects but one --> We win if the number of object >1
    if non_null == 1:
        max_row = max(state.rows)
        row_index = state.rows.index(max_row)
        return row_index, max_row - 1
    
    # Case 2: There is only 2 row with objects and one with only object in it, we take all the objects but one in the row with the maximum of object
    # --> We win
    if non_null == 2 and 1 in state.rows:
        max_row = max(state.rows)
        row_index = state.rows.index(max_row)
        return row_index, max_row
    
    # Case 3: We are on a stable state : we make a little perturbation
    if nim == 0:
        max_row = max(state.rows)
        row_index = state.rows.index(max_row)
        return row_index, 1
    
    # Case 4: We are not in the stable state nut we can make it stable by removing some object
    for i, row in enumerate(state.rows):
        if row & nim ^ nim == 0:
            return i, nim
        
    # Case 5: We are in non of the above cases : we are in loosing position. We make a random move
    return pure_random(state)

In [23]:
logging.getLogger().setLevel(logging.INFO)
strategy = (expert_nim_agent, pure_random)
nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")

INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays (4, 9)
INFO:root:status: <1 3 5 7 0>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=3)
INFO:root:status: <1 3 2 7 0>
INFO:root:ply: player 0 plays (3, 7)
INFO:root:status: <1 3 2 0 0>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=2)
INFO:root:status: <1 1 2 0 0>
INFO:root:ply: player 0 plays (2, 2)
INFO:root:status: <1 1 0 0 0>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 1 0 0 0>
INFO:root:ply: player 0 plays (1, 0)
INFO:root:status: <0 1 0 0 0>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 0 0 0 0>
INFO:root:status: Player 0 won!


In [24]:
logging.getLogger().setLevel(logging.INFO)
strategy = (expert_nim_agent, pure_random)
score = [0, 0]
for i in range(100):
    alea = random.randint(5, 20)
    nim = Nim(alea)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:score: [100, 0]


In [25]:
logging.getLogger().setLevel(logging.INFO)
strategy = (expert_nim_agent, optimal)
score = [0, 0]
for i in range(100):
    alea = random.randint(5, 20)
    nim = Nim(alea)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:score: [100, 0]


As we can see the results are reaaly satisfying with this expert agent as we are always winning against optimal strategies.