Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: Policy Search

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The player **taking the last object wins**.

* Task3.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task3.2: An agent using evolved rules
* Task3.3: An agent using minmax
* Task3.4: An agent using reinforcement learning

## Instructions

* Create the directory `lab3` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)


In [2]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor

## The *Nim* and *Nimply* classes

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [4]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

## Sample (and silly) startegies [HARDCODED]

In [38]:
#Global variable for semplicity
turn = 0


def fast(state: Nim) -> Nimply:
    """
    Take the max elements if its turn is its even one otherwise it will take one random element
    """
    if (turn // 2)%2 == 0:
        #Take max from the row with most elements

        max_val = max(state.rows)
        index = 0

        #Search the max
        for row, elements in enumerate(state.rows):
            if elements == max_val:
                index = row
                break

        row = index

        if state.k is None:
            num_objects = state.rows[row]
        else:
            if state.rows[row] > state.k:
                num_objects = state.k
            else: 
                num_objects = state.rows[row]
    else:
        #Take 1 single element from a random row
        
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        num_objects = 1
    return Nimply(row, num_objects)

In [9]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result

def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state)

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        brute_force.append((m, nim_sum(tmp)))
    cooked["brute_force"] = brute_force

    return cooked

In [10]:
def optimal_startegy(state: Nim) -> Nimply:
    data = cook_status(state)
    return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]

In [11]:
def make_strategy(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)

        if random.random() < genome["p"]:
            ply = Nimply(data["shortest_row"], random.randint(1, state.rows[data["shortest_row"]]))
        else:
            ply = Nimply(data["longest_row"], random.randint(1, state.rows[data["longest_row"]]))

        return ply

    return evolvable

In [34]:
NUM_MATCHES = 10
NIM_SIZE = 10


def evaluate(strategy: Callable) -> float:
    opponent = (strategy, optimal_startegy)
    won = 0

    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE)
        player = 0
        while nim:
            ply = opponent[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / NUM_MATCHES

In [35]:
evaluate(make_strategy({"p": 0.9999}))

0.0

## Oversimplified match

In [36]:
logging.getLogger().setLevel(logging.DEBUG)

strategy = (make_strategy({"p": 0.5}), optimal_startegy)
strategy = (fast, optimal_startegy)

nim = Nim(11)
logging.debug(f"status: Initial board  -> {nim}")
player = 0

turn = 0
while nim:
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"[{turn}] - status: After player {player} -> {nim}")
    player = 1 - player
    turn += 1
winner = 1 - player
logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9 11 13 15 17 19 21>
DEBUG:root:[0] - status: After player 0 -> <1 3 5 7 9 11 13 15 17 19 0>
DEBUG:root:[1] - status: After player 1 -> <1 1 5 7 9 11 13 15 17 19 0>
DEBUG:root:[2] - status: After player 0 -> <0 1 5 7 9 11 13 15 17 19 0>
DEBUG:root:[3] - status: After player 1 -> <0 0 5 7 9 11 13 15 17 19 0>
DEBUG:root:[4] - status: After player 0 -> <0 0 5 7 9 11 13 15 17 0 0>
DEBUG:root:[5] - status: After player 1 -> <0 0 5 7 9 11 13 15 2 0 0>
DEBUG:root:[6] - status: After player 0 -> <0 0 5 7 9 11 13 14 2 0 0>
DEBUG:root:[7] - status: After player 1 -> <0 0 4 7 9 11 13 14 2 0 0>
DEBUG:root:[8] - status: After player 0 -> <0 0 4 7 9 11 13 0 2 0 0>
DEBUG:root:[9] - status: After player 1 -> <0 0 4 7 7 11 13 0 2 0 0>
DEBUG:root:[10] - status: After player 0 -> <0 0 4 7 7 11 13 0 1 0 0>
DEBUG:root:[11] - status: After player 1 -> <0 0 4 4 7 11 13 0 1 0 0>
DEBUG:root:[12] - status: After player 0 -> <0 0 4 4 7 11 0 0 1 0 0>
DEBUG:root:[13] -

SyntaxError: unmatched ')' (1195981134.py, line 1)