Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [3]:
# Import libraries

import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
import numpy as np

## The *Nim* and *Nimply* classes

In [4]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [85]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        if row < 0 or row >= len(self._rows):
            raise ValueError("Invalid row index.")
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [78]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, min(state.rows[row],state._k))
    return Nimply(row, num_objects)


In [10]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [11]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [13]:

def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [14]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=2)
INFO:root:status: <1 3 3 7 9>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=6)
INFO:root:status: <1 3 3 7 3>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=2)
INFO:root:status: <1 3 1 7 3>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=6)
INFO:root:status: <1 3 1 1 3>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <1 1 1 1 3>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <1 1 0 1 3>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 0 0 1 3>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 0 0 1 3>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=3)
INFO:root:status: <0 0 0 1 0>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 0 0>
INFO:root:status: Player 0 won!


First let's see what happen when the two players have the same strategy : we will play 50 times and see the winning rates of each player

In [23]:
logging.getLogger().setLevel(logging.INFO)

strategy = (pure_random, pure_random)

score = [0, 0]
for i in range(100):
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:score: [50, 50]


In [24]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, optimal)

score = [0, 0]
for i in range(100):
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:score: [51, 49]


In [30]:
logging.getLogger().setLevel(logging.INFO)

strategy = (gabriele, gabriele)

score = [0, 0]
for i in range(100):
    nim = Nim(6)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:score: [100, 0]


We can see that if players choose the same strategies than they are equally likely to win except for gabriele where its is only the second player that win if the number of row is odd and the first one if the number of row is even (Which makes sense because you take all the objects of the smallest row so if both players plays like that the issue of the game is always the same and oonly depend on the number of rows)

1) Write an expert agent using Nim-Sum

In [31]:
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    # Create binary lines were the last bits are the number of objects in each row(written in binary)
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)

print(nim_sum(Nim(5)))

9


In [39]:
tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in [i * 2 + 1 for i in range(6)]])
print(tmp)
print(tmp.sum(axis=0))
xor=tmp.sum(axis=0) % 2
print(xor)
print(int("".join(str(_) for _ in xor), base=2)
)

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 3 6]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]
2


In [111]:
def expert_nim_agent(state: Nim) -> Nimply:
    """An agent that plays optimally."""
    k = state._k
    nim_sum_value = nim_sum(state)

    if nim_sum_value == 0:
        
        valid_moves = []
        for row, pile in enumerate(state.rows):
            for objects_to_remove in range(1, min(k, pile) + 1):
                valid_moves.append((row, objects_to_remove))
        if not valid_moves:
            # No valid moves within the k limit, so we make a random move.
            random_pile = random.randint(0, len(state.rows) - 1)
            objects_to_remove = random.randint(1, min(k, state.rows[random_pile]))

            return (random_pile, objects_to_remove)

        return random.choice(valid_moves)
    
    else:
        for row, pile in enumerate(state.rows):

            target_xor = nim_sum_value ^ pile
            if target_xor < pile:
                objects_to_remove = pile - target_xor
                if objects_to_remove <= k and state.rows[row] >= objects_to_remove:

                    return (row, objects_to_remove)
                
                

    # If no optimal move is found, make a random move within the k limit.
    valid_moves = []
    for row, pile in enumerate(state.rows):
        for objects_to_remove in range(1, min(k-1, pile) + 1):
            valid_moves.append((row, objects_to_remove))
    if not valid_moves:
        # No valid moves within the k limit, so we make a random move.
        random_pile = random.randint(0, len(state.rows) - 1)
        objects_to_remove = random.randint(1, min(k, state.rows[random_pile]))

        return (random_pile, objects_to_remove)

    return random.choice(valid_moves)

In [117]:
logging.getLogger().setLevel(logging.INFO)

strategy = (pure_random, expert_nim_agent)

nim = Nim(10, k=5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")

INFO:root:init : <1 3 5 7 9 11 13 15 17 19>
INFO:root:ply: player 0 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 7 9 11 13 15 17 19>
INFO:root:ply: player 1 plays (1, 3)
INFO:root:status: <0 0 5 7 9 11 13 15 17 19>
INFO:root:ply: player 0 plays Nimply(row=8, num_objects=1)
INFO:root:status: <0 0 5 7 9 11 13 15 16 19>
INFO:root:ply: player 1 plays (2, 1)
INFO:root:status: <0 0 4 7 9 11 13 15 16 19>
INFO:root:ply: player 0 plays Nimply(row=7, num_objects=4)
INFO:root:status: <0 0 4 7 9 11 13 11 16 19>
INFO:root:ply: player 1 plays (2, 4)
INFO:root:status: <0 0 0 7 9 11 13 11 16 19>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 6 9 11 13 11 16 19>
INFO:root:ply: player 1 plays (4, 1)
INFO:root:status: <0 0 0 6 8 11 13 11 16 19>
INFO:root:ply: player 0 plays Nimply(row=5, num_objects=3)
INFO:root:status: <0 0 0 6 8 8 13 11 16 19>
INFO:root:ply: player 1 plays (3, 1)
INFO:root:status: <0 0 0 5 8 8 13 11 16 19>
INFO:root:ply: player 0 play

In [123]:

logging.getLogger().setLevel(logging.INFO)

strategy = (pure_random, expert_nim_agent)

score = [0, 0]
for i in range(100):
    alea1 = random.randint(5, 20)
    alea2 = random.randint(2, alea1)
    nim = Nim(alea1, k=alea2)
    logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:init : <1 3 5 7 9 11>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21>
INFO:root:init : <1 3 5 7 9 11 13>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25>
INFO:root:init : <1 3 5 7 9 11 13 15>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23>
INFO:root:init : <1 3 5 7 9 11>
INFO:root:init : <1 3 5 7 9 11 13 15 17>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21>
INFO:root:init : <1 3 5 7 9 11 13 15 17>
INFO:root:init : <1 3 5 7 9 11 13>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37>
INFO:root:init : <1 3 5 7 9 1

In [129]:
def avoid_last_object_nim_agent(state: Nim) -> Nimply:
    k = state._k
    nim_sum_value = nim_sum(state)

    if nim_sum_value == 0:
        # If the nim-sum is already 0, we are in a losing position.
        # Make a random move within the limits of k.
        valid_moves = []
        for i, pile in enumerate(state.rows):
            for objects_to_remove in range(1, min(k, pile) + 1):
                valid_moves.append((i, objects_to_remove))
        if not valid_moves:
            # No valid moves within the k limit, so we make a random move.
            random_row = random.randint(0, len(state.rows) - 1)
            objects_to_remove = random.randint(1, min(k, state.rows[random_row]))
            return (random_row, objects_to_remove)
        return random.choice(valid_moves)
    else:
        for i, pile in enumerate(state.rows):
            target_xor = nim_sum_value ^ pile
            if target_xor < pile:
                objects_to_remove = pile - target_xor
                if objects_to_remove <= k and state.rows[i] >= objects_to_remove:
                    return (i, objects_to_remove)

    # If no optimal move is found, make a random move within the k limit.
    valid_moves = []
    for i, pile in enumerate(state.rows):
        for objects_to_remove in range(1, min(k, pile) + 1):
            valid_moves.append((i, objects_to_remove))
    if not valid_moves:
        # No valid moves within the k limit, so we make a random move.
        random_row = random.randint(0, len(state.rows) - 1)
        objects_to_remove = random.randint(1, min(k, state.rows[random_row]))
        return (random_row, objects_to_remove)
    return random.choice(valid_moves)

In [132]:

logging.getLogger().setLevel(logging.INFO)

strategy = (avoid_last_object_nim_agent, pure_random)

score = [0, 0]
for i in range(100):
    alea1 = random.randint(5, 20)
    alea2 = random.randint(2, alea1)
    nim = Nim(alea1, k=alea2)
    logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35>
INFO:root:init : <1 3 5 7 9 11>
INFO:root:init : <1 3 5 7 9>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23>
INFO:root:init : <1 3 5 7 9 11 13 15>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19>
INFO:root:init : <1 3 5 7 9 11 13>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19>
INFO:root:init : 

In [148]:
def minimax_nim_agent(state):
    def nim_sum(piles):
        xor_sum = 0
        for pile in piles:
            xor_sum ^= pile
        return xor_sum

    def minimax(piles, k):
        if not piles or nim_sum(piles) == 0:
            # No piles left or a losing position, make a random move
            return random_random_move(piles, k)

        for i, pile in enumerate(piles):
            for objects_to_remove in range(1, min(k, pile) + 1):
                new_piles = list(piles)  # Convert to a list for modification
                new_piles[i] -= objects_to_remove
                if nim_sum(new_piles) == 0:
                    return (i, objects_to_remove)

        # No optimal move found, make a random move
        return random_random_move(piles, k)

    def random_random_move(piles, k):
        valid_moves = []
        for i, pile in enumerate(piles):
            for objects_to_remove in range(1, min(k, pile) + 1):
                valid_moves.append((i, objects_to_remove))
        if valid_moves:
            return random.choice(valid_moves)
        else:
            return random.choice([(i, 1) for i in range(len(piles)) if piles])

    return minimax(list(state.rows), state._k)


In [149]:

logging.getLogger().setLevel(logging.INFO)

strategy = (minimax_nim_agent, pure_random)

score = [0, 0]
for i in range(100):
    alea1 = random.randint(5, 20)
    alea2 = random.randint(2, alea1)
    nim = Nim(alea1, k=alea2)
    logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21>
INFO:root:init : <1 3 5 7 9 11 13>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27>
INFO:root:init : <1 3 5 7 9 11>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25>
INFO:root:init : <1 3 5 7 9>
INFO:root:init : <1 3 5 7 9 11>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27>
INFO:root:init : <1 3 5 7 9 11>
INFO:root:init : <1 3 5 7 9 11 13 15 17>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25>
INFO:root:init : <1 3 5 7 9 11 13 15 17>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27>
INFO:root:init : <1 3 5 7 9 11 13 15>
INFO:root:init : <1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31>
INFO:root:

In [158]:
def nim_misere_win(state: Nim):
    nim_piles = np.array(state.rows)
    k = state._k
    # Check if the XOR of the pile sizes is already 0
    xor_sum = 0
    for pile in nim_piles:
        xor_sum ^= pile
    
    # If the XOR sum is already 0, you're in a losing position
    if xor_sum == 0:
        # Find a pile with more than 1 element and reduce it to 1
        for i in range(len(nim_piles)):
            if nim_piles[i] > 1:
                nim_piles[i] = 1
                return i, -1
    
    # If the XOR sum is not 0, check if it's possible to make it 0
    # by taking elements from a pile
    for i in range(len(nim_piles)):
        if xor_sum ^ nim_piles[i] < nim_piles[i]:
            # If it's possible, take elements from the pile to make it happen
            new_size = xor_sum ^ nim_piles[i]
            nim_piles[i] = new_size
            return tuple(nim_piles)
    
    # If neither of the above conditions is met, take 1 element from any pile
    for i in range(len(nim_piles)):
        if nim_piles[i] >= 1:
            nim_piles[i] -= 1
            return tuple(nim_piles)


In [159]:

logging.getLogger().setLevel(logging.INFO)

strategy = (nim_misere_win, pure_random)

score = [0, 0]
for i in range(100):
    alea1 = random.randint(5, 20)
    alea2 = random.randint(2, alea1)
    nim = Nim(alea1, k=alea2)
    logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    score[player] += 1
logging.info(f"score: {score}")

INFO:root:init : <1 3 5 7 9 11 13>


ValueError: too many values to unpack (expected 2)

In [157]:
ob= Nim(4, k=3)

ob.rows[1] = ob.rows[1] - 1

TypeError: 'tuple' object does not support item assignment