Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [195]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from math import ceil, e
from dataclasses import dataclass, asdict, field
from typing import Literal, TypedDict, Any
import random
from tqdm.notebook import tqdm, trange


## The *Nim* and *Nimply* classes

In [196]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [197]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        """
        Args:
            num_rows (int): number of piles
            k (int, optional): maximum number of objects nimmable each time. Defaults to None (any amount).
        """
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">" + (f" ({self._k}) " if self._k is not None else "")

    def __repr__(self):
        return self.__str__()

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)
    
    @property
    def k(self) -> int:
        return self._k
    

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k, f"{num_objects=}, {self._k=}"
        self._rows[row] -= num_objects

## Sample (and silly) startegies 

In [198]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row]) if state._k is None else min(random.randint(1, state.rows[row]), state._k)
    return Nimply(row, num_objects)


In [199]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the smallest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c+1 if state._k is None else min(c + 1, state._k))]
    return max(possible_moves, key=lambda m: (-m[0], m[1]))

In [200]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c+1 if raw._k is None else min(c + 1, raw._k))):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    logging.debug(pformat(f"{analysis['possible_moves']}"))
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [214]:
def match(player_position: int, player_strategy, opponent, *, size: int = 5, k: int = None, lvl = logging.WARN):
    logging.getLogger().setLevel(lvl)
    strategy = (player_strategy, opponent) if player_position == 0 else (opponent, player_strategy)

    nim = Nim(size, k)
    logging.info(f"init : {nim} {bool(nim)=}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        # logging.info(f"ply: player {player} ({strategy[player].__qualname__}) \t plays {ply} -> {nim} ({nim_sum(nim)})")
        player = 1 - player
    # logging.info(f"status: Player {player} ({strategy[player].__qualname__}) won!")
    return player == player_position

In [202]:
def expert_strategy(state: Nim, klimit: bool = False) -> Nimply:
    """
    This function implement an expert systems which beats the strategies defined above
    """
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    not_zero_rows = len(state.rows) - state.rows.count(0)
    one_count_rows = state.rows.count(1)
    # if state._k is not None and klimit:
    #     non_modulo_rows = [Nimply(row, (objects % state.k + 1)) for row, objects in enumerate(state.rows) if objects > state._k and (objects % (state._k+1)) == 0]
    #     if len(non_modulo_rows) > 0:
    #         return non_modulo_rows[0]
    if one_count_rows == not_zero_rows - 1:
        is_odd = (one_count_rows % 2) == 1
        row, objects = [(row, objects) for row, objects in enumerate(state.rows) if objects > 1][0]
        if is_odd:
            return Nimply(row, objects if state.k is None else min(objects, state.k))
        return Nimply(row, objects - 1 if state.k is None else min(objects - 1, state.k))
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    logging.debug(pformat(f"{analysis['possible_moves']}"))
    ply = random.choice(spicy_moves)
    return ply

# def super_expert(state: Nim):
#     return expert_strategy(state, True)


In [203]:
# for opponent in [gabriele, pure_random, optimal]:
#     matches = 100
#     accuracy = [match(random.choice([0, 1]), expert_strategy, opponent, k=3, size=5) for _ in range(matches)].count(True)/matches
#     print(f"Accuracy of {accuracy:.2%} vs {opponent.__qualname__}")


# Adaptive Strategy

In [204]:
def remaining_moves(n: Nim, ratio: bool = False):
    mr = sum([1 for _, c in enumerate(n.rows) for _ in range(1, c+1 if n._k is None else min(c + 1, n._k))])
    if ratio:
        mt = remaining_moves(Nim(len(n.rows), n.k), False)
        return mr/mt
    else:
        return mr

In [260]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    # https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python
    e_x = np.exp(x - np.max(x))
    return (e_x / e_x.sum(axis=0)).tolist() # only difference

mutation_rate: float = (1, 1)

@dataclass(init=False)
class Individual:
    n_strategy: int
    phase_thresholds: tuple[float] 
    strategy_probs: tuple[tuple[float]]
    
    __qualname__ = "individual"

    def __init__(self, n_strategy: int = None, strategy_probs = None, phase_thresholds = None) -> None:
        if n_strategy is None:
            n_strategy = 3
        if strategy_probs is None:
            strategy_probs = Individual._generate_random_strategy_probs(n_strategy)
        else:
            strategy_probs = Individual._softmax_probs(strategy_probs)
        if phase_thresholds is None:
            phase_thresholds = sorted([random.random(), random.random()])
        else:
            phase_thresholds = sorted([max(0, phase_thresholds[0]), min(1, phase_thresholds[1])])
        
        self.n_strategy = n_strategy
        self.strategy_probs = strategy_probs
        self.phase_thresholds = phase_thresholds
    
    def _generate_random_strategy_probs(n_strategy):
        matrix = [[], [], []]
        for i in range(3):
            x = [random.random() for _ in range(n_strategy)]
            x = softmax(x)
            matrix[i] = x
        return matrix
    def _softmax_probs(strategy_probs):
        for i in range(len(strategy_probs)):
            strategy_probs[i] = softmax(strategy_probs[i])
        return strategy_probs
    
    def mutate(ind: "Individual") -> "Individual":
        global mutation_rate
        ind = deepcopy(ind)
        phase_thresholds = np.random.normal(ind.phase_thresholds, mutation_rate[0]).tolist()
        strategy_probs = np.random.normal(ind.strategy_probs, mutation_rate[1]).tolist()
        return Individual(strategy_probs=strategy_probs, phase_thresholds=phase_thresholds, n_strategy=ind.n_strategy)
    
    def __call__(self: "Individual", state: Nim) -> Nimply:
        phase_ratio = remaining_moves(state, True)
        phase_index = 0 if phase_ratio < self.phase_thresholds[0] else (1 if self.phase_thresholds[0] <= phase_ratio <= self.phase_thresholds[1] else 2)
        probs = self.strategy_probs[phase_index]
        STRATEGIES = [expert_strategy, pure_random, gabriele, optimal]
        strategy = np.random.choice(STRATEGIES[:self.n_strategy], p=probs)
        return strategy(state)
    



# i = Individual()
# pprint((i, i.mutate()))

In [271]:
LAMBDA = 40
OPPONENT = optimal
N_MATCHES = 16

def streak(player_strategy, n: int = N_MATCHES, opponent = OPPONENT) -> float:
    wins = 0
    for _ in range(n):
        random_size = random.randint(4,10)
        random_k = random.choice([None, None, *[random.randint(2, random_size*2+1) for _ in range(2)]])
        # pprint((random_size, random_k))
        wins += 1 if match(random.choice([0,1]), player_strategy, opponent, size=random_size, k=random_k) else 0
    return wins / n 

parent = Individual()
parent_result = streak(parent)

In [272]:
pbar = trange(0, 10_000 // LAMBDA)
for i in pbar:
    pbar.set_description(f"Parent Accuracy: {parent_result:.2%}")
    offspring = [parent.mutate() for _ in range(LAMBDA)]
    results = [streak(i) for i in offspring]
    
    incrate = (np.sum([res > parent_result for res in results])/LAMBDA)

    if incrate > 1/5:
        mutation_rate = (mutation_rate[0]*1.1, mutation_rate[1]*1.1)
    elif incrate < 1/5:
        mutation_rate = (mutation_rate[0]/1.1, mutation_rate[1]/1.1)

    
    solution_ind = np.argmax(results)
    if parent_result < results[solution_ind]:
        parent = offspring[solution_ind]
        parent_result = results[solution_ind]    
    
    if parent_result >= 0.999:
        break


  0%|          | 0/250 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [270]:
# f"{streak(parent, 100, optimal):.2%}"
parent.strategy_probs, parent.phase_thresholds

([[0.19227118814077065, 0.4996584276813375, 0.30807038417789184],
  [0.6165462564242461, 0.17785386389554347, 0.20559987968021046],
  [0.6808987386649323, 0.06437540111076256, 0.2547258602243051]],
 [0.1163076915116311, 0.9213910580350032])

# Extensions

## Genetic Approach

In [208]:
# This is the start of the code when i thought i was supposed to do a Genetic Algorithm

from dataclasses import dataclass, asdict, field
from typing import Literal, TypedDict, Any
import random

Allele: TypedDict = {
    "prefer_rows": {"type": "discrete", "value": [0, 1, -1]},
    "percent_to_take": {"type": "continous", "value": [0, 1]},
}

def random_allele_value(key):
    v: dict[str, Any] = Allele[key]
    if v.get("type") == "discrete":
        return random.choice(v["value"])
    elif v.get("type") == "continous":
        start, stop = v.get("value")
        size = stop-start
        return (random.random() * size) + start


@dataclass(frozen=True)
class Genome:
    prefer_rows: Literal[0, 1, -1] = field(default_factory=lambda: random_allele_value("prefer_rows"))
    """-1 favours smaller rows, 1 bigger, 0 indifferent"""
    percent_to_take: float = field(default_factory=lambda: random_allele_value("percent_to_take"))
    """Range: [0, 1]"""

    def mutate(g1: "Genome") -> "Genome":
        """Alters one single gene of the starting genome

        Args:
            g1 (Genome): Starting genome (never altered)

        Returns:
            Genome: Mutated genome
        """
        d = asdict(g1)
        rand_attr_to_change: str = random.choice(list(d.keys()))
        d[rand_attr_to_change] = random_allele_value(rand_attr_to_change)
        return Genome(**d)

    def crossover(g1: "Genome", g2: "Genome") -> "Genome":
        d1, d2 = asdict(g1), asdict(g2)
        child = dict()
        for field in d1.keys():
            child[field] = d1[field] if random.random() < 0.5 else d2[field]

        return Genome(**child)
    
d = Genome()
e = Genome()
print(d, e, d.crossover(e).mutate())

Genome(prefer_rows=0, percent_to_take=0.3173647956205522) Genome(prefer_rows=-1, percent_to_take=0.5500058232042224) Genome(prefer_rows=-1, percent_to_take=0.3173647956205522)
