Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [2]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [4]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [5]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [6]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [7]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [8]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [9]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <1 1 5 7 9>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=5)
INFO:root:status: <1 1 0 7 9>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 0 0 7 9>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=5)
INFO:root:status: <1 0 0 7 4>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=6)
INFO:root:status: <1 0 0 1 4>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <1 0 0 0 4>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=4)
INFO:root:status: <1 0 0 0 0>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 0 0 0 0>
INFO:root:status: Player 0 won!


## Task one - Expert agent

In [12]:
def read_state(nim: Nim):
    str_nim = str(nim).strip('<>').split(' ')
    return [int(n) for n in str_nim]

def show_board(nim: Nim) -> None:
    state = read_state(nim)
    for r in range(len(state)):
        tmp_str = ''
        for _ in range(len(state) - (r + 1)): tmp_str += '_ '
        for _ in range(state[r]): tmp_str += 'O '
        for _ in range(len(state) + r - state[r]): tmp_str += '_ '
        print(tmp_str)

def my_nim_sum(state):
    to_remove = state[0]
    for r in state[1:]: to_remove = np.bitwise_xor(to_remove, r)
    return to_remove

def find_move(state):
    
    idx_more_than_one = np.where(np.array(state) > 1)[0]
    more_than_one = len(idx_more_than_one)
    n_one = len(np.where(np.array(state) == 1)[0])

    ## my tweak to the optimal strategy of always zeroing the nim-sum

    if more_than_one == 1:
        if n_one % 2 != 0:
            return Nimply(idx_more_than_one[0], state[idx_more_than_one[0]])
        return Nimply(idx_more_than_one[0], state[idx_more_than_one[0]] - 1)
    elif more_than_one == 0:
        return Nimply(np.where(np.array(state) == 1)[0][0], 1)
    
    ## find a move that make the nim-sum zero
    
    for r in range(len(state)):
        for c in range(1, state[r] + 1):
            state_copy = state.copy()
            state_copy[r] -= c

            if my_nim_sum(state_copy) == 0: return Nimply(r, c)

    ## the nim-sum is already zero, meaning disadvantage, and that each move can be countered -> remove a single element from a random row (longer games mean more probability for the opponent to make a mistake)

    #print('no move found - going random')
    remove_from = random.randint(0, len(state) - 1)
    while state[remove_from] == 0: remove_from = random.randint(0, len(state) - 1)
    return Nimply(remove_from, 1)


def nim_sum_move(nim: Nim):
    state = read_state(nim)

    if sum(state) == 0: 
        print('error - game is empty')
        return None

    return find_move(state)

In [680]:
#nim = Nim(6)
#show_board(nim)
#print('-----------------------')
#print('starting the game!\n')
#to_remove = my_nim_sum(read_state(nim))
#print('if played correctly')
#if to_remove == 0: print('player 1 should win')
#else: print('player 0 should win')
#print('-----------------------')
#
#strategies = [nim_sum_move, optimal, nim_sum_move, ]
#player = 0
#while nim:
#    ply = strategies[player](nim)
#    print(f"player {player} removes {ply[1]} from {ply[0]}")
#    nim.nimming(ply)
#    show_board(nim)
#    #print(nim)
#    player = 1 - player
#    print('------------------------------')
#print(f"end of game: Player {player} won!")

In [13]:
strategies = [nim_sum_move, optimal, nim_sum_move, gabriele, ]
NIM_ROWS_RANGE = [2, 3, 4, 5, 6, 7, 8, 9, 10]
N_GAMES = 100

for nim_rows in NIM_ROWS_RANGE:

    for starting_player in [0, 1]: #[0, 1]:

        first = strategies[starting_player].__name__
        second = strategies[1 - starting_player].__name__
        to_remove = my_nim_sum(read_state(Nim(nim_rows)))

        print('==================================================================================================================================')
        print('starting games!\n')
        print(f'number of nim rows: {nim_rows}')
        print(f'starting nim-sum: {to_remove}')
        print(f'first to play: {first}')
        if to_remove == 0: print(f"if played correctly <<{second}>> has advantage because the initial nim-sum is {to_remove} and it's the second to play")
        else: print(f"if played correctly <<{first}>> has advantage because the initial nim-sum is {to_remove} and it's the first to play")
        print('-----------------------')

        wins = [0, 0]
        for i in range(N_GAMES):
            #print('================================')
            #print('================================')
            #print(f'GAME {i}')
            #print('================================')
            #print('================================')
            nim = Nim(nim_rows)
            player = starting_player
            #show_board(nim)
            #print(my_nim_sum(read_state(nim)))

            while nim:
                #print('---------------------------')
                #print(f'player <<{strategies[player].__name__}>>')
                ply = strategies[player](nim)
                nim.nimming(ply)
                #show_board(nim)
                #print(my_nim_sum(read_state(nim)))
                player = 1 - player
            wins[player] += 1

        print(f'<<{strategies[0].__name__}>> won {wins[0]} times')
        print(f'<<{strategies[1].__name__}>> won {wins[1]} times')

starting games!

number of nim rows: 2
starting nim-sum: 2
first to play: nim_sum_move
if played correctly <<nim_sum_move>> has advantage because the initial nim-sum is 2 and it's the first to play
-----------------------
<<nim_sum_move>> won 100 times
<<optimal>> won 0 times
starting games!

number of nim rows: 2
starting nim-sum: 2
first to play: optimal
if played correctly <<optimal>> has advantage because the initial nim-sum is 2 and it's the first to play
-----------------------
<<nim_sum_move>> won 55 times
<<optimal>> won 45 times
starting games!

number of nim rows: 3
starting nim-sum: 7
first to play: nim_sum_move
if played correctly <<nim_sum_move>> has advantage because the initial nim-sum is 7 and it's the first to play
-----------------------
<<nim_sum_move>> won 100 times
<<optimal>> won 0 times
starting games!

number of nim rows: 3
starting nim-sum: 7
first to play: optimal
if played correctly <<optimal>> has advantage because the initial nim-sum is 7 and it's the first

## Task two - evolutionary strategy

In [569]:
## first version




#def read_state(nim: Nim): return [int(n) for n in str(nim)[1:-1] if n != ' ']
#
#def show_board(nim: Nim) -> None:
#    state = read_state(nim)
#    for r in range(len(state)):
#        tmp_str = ''
#        for _ in range(len(state) - (r + 1)): tmp_str += '_ '
#        for _ in range(state[r]): tmp_str += 'O '
#        for _ in range(len(state) + r - state[r]): tmp_str += '_ '
#        print(tmp_str)
#
#def my_nim_sum(state):
#    to_remove = state[0]
#    for r in state[1:]: to_remove = np.bitwise_xor(to_remove, r)
#    return to_remove
#
#def compute_unmatched(state):
#    unmatched = []
#
#    for r in range(len(state)):
#        unmatched.append([])
#        n_r = state[r]
#
#        pow_2 = 1
#        while pow_2 * 2 <= n_r: pow_2 *= 2
#
#        while(n_r > 0):
#            if pow_2 <= n_r:
#                matched = False
#                for um in range(len(unmatched)):
#                    if int(pow_2) in unmatched[um] and not matched:
#                        unmatched[um].remove(int(pow_2))
#                        matched = True
#                if not matched: unmatched[-1].append(int(pow_2))
#                n_r -= pow_2
#            pow_2 /= 2
#
#    return unmatched
#
#def find_move(state):
#    unmatched = compute_unmatched(state)
#
#    lens = np.where([len(um) > 0 for um in unmatched])[0]
#    
#    if len(lens) == 1:
#        sum_lens_0 = sum(unmatched[lens[0]])
#        if sum_lens_0 == 1: return Nimply(lens[0], 1)
#        if sum(state) == sum_lens_0: return Nimply(lens[0], sum_lens_0 - 1)
#        return Nimply(lens[0], sum_lens_0)
#    
#    idx_more_than_one = np.where(np.array(state) > 1)[0]
#    more_than_one = len(idx_more_than_one)
#    n_one = len(np.where(np.array(state) == 1)[0])
#
#    if more_than_one == 1:
#        if n_one % 2 != 0:
#            return Nimply(idx_more_than_one[0], state[idx_more_than_one[0]])
#        return Nimply(idx_more_than_one[0], state[idx_more_than_one[0]] - 1)
#    
#    #if len(lens) == 2:
#    #    if sum(unmatched[lens[0]]) > sum(unmatched[lens[1]]): 
#    #        return Nimply(lens[0], sum(unmatched[lens[0]]) - sum(unmatched[lens[1]]))
#    #    
#    #    return Nimply(lens[1], sum(unmatched[lens[1]]) - sum(unmatched[lens[0]]))
#    
#    for r in range(len(state)):
#        for c in range(1, state[r] + 1):
#            state_copy = state.copy()
#            state_copy[r] -= c
#
#            if my_nim_sum(state_copy) == 0: return Nimply(r, c)
#
#    print('no move found - going random') # just in case, it shouldn't happen
#    remove_from = random.randint(0, len(state) - 1)
#    while state[remove_from] == 0: remove_from = random.randint(0, len(state) - 1)
#    return Nimply(remove_from, 1)
#
#
#def nim_sum_move(nim: Nim):
#    state = read_state(nim)
#
#    if sum(state) == 0: 
#        print('error - game is empty')
#        return None
#
#    to_remove = my_nim_sum(state)
#
#    ## if nim-sum is already zero
#    
#    if to_remove == 0:
#        ## check for a conf that respect the nim_sum == 0 (is it possible?) --> no, it's not possible
#        ## so instead search for the conf with most unmatched row
#
#        candidates = PriorityQueue()
#        for r in range(len(state)):
#            for c in range(1, state[r] + 1):
#                state_copy = state.copy()
#                state_copy[r] -= c
#
#                lens = np.where([len(um) > 0 for um in compute_unmatched(state_copy)])[0]
#
#                candidates.put((-len(lens), Nimply(r, c)))
#
#        return candidates.get()[1]
#
#        ## last possibility is random
#        ## choice to randomly take only 1 pin from a random row, so that the game last longer and the opponent (which in this case has advantage)
#        ## has more possibilities to make a mistake
#        
#        remove_from = random.randint(0, len(state) - 1)
#        while state[remove_from] == 0: remove_from = random.randint(0, len(state) - 1)
#        return Nimply(remove_from, 1)
#
#    ## search move to zero nim-sum
#
#    return find_move(state)