Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [133]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from tqdm import tqdm


## The *Nim* and *Nimply* classes

In [11]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [12]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        if k is None:
            k = (num_rows - 1) * 2 + 1
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Strategies

In [13]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, min(state.rows[row], state.k))
    return Nimply(row, num_objects)


In [14]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, min(state.k + 1, c + 1))]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [82]:
def take_half_from_last_row(state: Nim) -> Nimply:
    row = -1
    for i in range(len(state.rows) -1, -1, -1):
        if state.rows[i] > 0:
            row = i
            break
    n_obj = min(state.k, state.rows[row] // 2) if state.rows[row] // 2 != 0 else min(state.k, state.rows[row])
    return Nimply(row, n_obj)

In [83]:
def take_half_from_top(state: Nim) -> Nimply:
    row = -1
    for i in range(len(state.rows)):
        if state.rows[i] > 0:
            row = i
            break
    n_obj = min(state.k, state.rows[row] // 2) if state.rows[row] // 2 != 0 else min(state.k, state.rows[row])
    return Nimply(row, n_obj)


In [70]:
def take_one(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    return Nimply(row, 1)

In [86]:
def take_even_odd(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    if row % 2 == 0:
        n_obj = min(state.k, state.rows[row] // 2)
    else:
        n_obj = max(1, min(state.k, state.rows[row]))
    return Nimply(row, n_obj)

In [81]:
def leave_one(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    c = state.rows[row]
    n_obj = max(min(state.k, c -1), 1)
    return Nimply(row, n_obj)


In [15]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [16]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, min(raw.k + 1, c + 1))):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [59]:
def game(n_rows, k, strategy):
    # logging.getLogger().setLevel(logging.INFO)

    strategies = (strategy, optimal)

    nim = Nim(n_rows, k)
    # logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategies[player](nim)
        # logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        # logging.info(f"status: {nim}")
        player = 1 - player
    # logging.info(f"status: Player {player} won!")
    return player


In [60]:
def compare_strategies(strategies, n_rows, k):
    NUM_GAMES = 1000
    for name, s in strategies.items():
        winners = [game(n_rows, k, s) for _ in range(NUM_GAMES)]
        avg_1 = (sum(winners)) / (NUM_GAMES / 100)
        print(f"Optimal wins: {avg_1}% against {name}")

In [88]:
compare_strategies({"even_odd": take_even_odd, "half_last": take_half_from_last_row, "one": take_one, "half_top": take_half_from_top}, 5, 3)

Optimal wins: 10.1% against even_odd
Optimal wins: 74.0% against half_last
Optimal wins: 71.7% against one
Optimal wins: 72.6% against half_top


In [169]:
class Player:
    def __init__(self, weights: tuple[float], strategies):
        self.weights = weights
        self.strategies = strategies

    def get_strategy(self):
        min_weight = min(self.weights)
        if min_weight <= 0:
            shifted_weights = [w - min_weight + 1.0 for w in self.weights]  # Adding 1 ensures non-zero weights
        else:
            shifted_weights = self.weights  # No need to shift if all weights are non-negative
        return random.choices(self.strategies, weights=shifted_weights, k=1)[0]

    def __str__(self):
        w_to_s = [f'{w} ' for w in self.weights]
        return ''.join(w_to_s)


In [96]:
rules = [take_one, take_even_odd, take_half_from_top, take_half_from_last_row, gabriele, pure_random]

In [155]:
def generate_population(size = 30):
    pop = [Player(tuple([random.random() for _ in range(len(rules))]), rules) for _ in range(size)]
    return pop

In [154]:
def offspring(parents, mutation, size = 210):
    os = []
    parent_subset = [random.choice(parents) for _ in range(size)]
    for i in range(size):
        weights = np.array(parent_subset[i].weights)
        mutations = np.array([random.gauss(0, mutation) for _ in range(weights.shape[0])])
        weights += mutations
        new_player = Player(tuple(weights), rules)
        os.append(new_player)
    return os

In [179]:
def game_simulation(n_rows: int, k: int, Player_1: Player, Player_2: Player):
    # logging.getLogger().setLevel(logging.INFO)
    players = (Player_1, Player_2)
    nim = Nim(n_rows, k)
    # logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = players[player].get_strategy()(nim)
        # logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        # logging.info(f"status: {nim}")
        player = 1 - player
    # logging.info(f"status: Player {player} won!")
    return player


In [176]:
NUM_GAMES = 30
ROWS = 5
K = 5
optimal_player = Player(tuple([1.0]), [optimal])
def fitness(individual: Player) -> float:
    winners = [game_simulation(ROWS, K, individual, optimal_player) for _ in range(NUM_GAMES)]
    return (NUM_GAMES - sum(winners)) / NUM_GAMES

## (mu, Lambda)-ES

In [182]:
MU = 10
LAMBDA = 60
GENERATIONS = 60

In [177]:
def evaluate_offspring(offs: list[Player]) -> list[(Player, float)]:
    return [(play, fitness(play)) for play in offs]

In [183]:
pop = generate_population(size = MU)
for man in pop:
    print(fitness(man), end=" ")
mutation = 0.2
for step in tqdm(range(GENERATIONS)):
    off = offspring(pop, mutation, size = LAMBDA)
    evals = evaluate_offspring(off)
    evals.sort(key = lambda x: x[1], reverse = True)
    pop = [p for p, v in evals[:MU]]
    print(evals[0][1])

0.4 0.36666666666666664 0.43333333333333335 0.23333333333333334 0.3 0.3333333333333333 0.3 0.2 0.3 0.2 

  2%|▏         | 1/60 [00:04<04:05,  4.16s/it]

0.5666666666666667


  3%|▎         | 2/60 [00:08<03:57,  4.10s/it]

0.6


  5%|▌         | 3/60 [00:12<03:56,  4.14s/it]

0.5666666666666667


  7%|▋         | 4/60 [00:16<03:52,  4.15s/it]

0.7


  8%|▊         | 5/60 [00:20<03:48,  4.15s/it]

0.5666666666666667


 10%|█         | 6/60 [00:24<03:44,  4.15s/it]

0.6


 12%|█▏        | 7/60 [00:28<03:38,  4.13s/it]

0.6666666666666666


 13%|█▎        | 8/60 [00:33<03:35,  4.15s/it]

0.5666666666666667


 15%|█▌        | 9/60 [00:37<03:30,  4.14s/it]

0.6333333333333333


 17%|█▋        | 10/60 [00:41<03:26,  4.12s/it]

0.7


 18%|█▊        | 11/60 [00:45<03:20,  4.10s/it]

0.6333333333333333


 20%|██        | 12/60 [00:49<03:15,  4.08s/it]

0.6666666666666666


 22%|██▏       | 13/60 [00:53<03:11,  4.07s/it]

0.7


 23%|██▎       | 14/60 [00:57<03:07,  4.09s/it]

0.7


 25%|██▌       | 15/60 [01:01<03:03,  4.07s/it]

0.7


 27%|██▋       | 16/60 [01:05<02:58,  4.07s/it]

0.7666666666666667


 28%|██▊       | 17/60 [01:09<02:54,  4.07s/it]

0.7333333333333333


 30%|███       | 18/60 [01:13<02:50,  4.06s/it]

0.7333333333333333


 32%|███▏      | 19/60 [01:17<02:46,  4.06s/it]

0.6666666666666666


 33%|███▎      | 20/60 [01:21<02:43,  4.08s/it]

0.8


 35%|███▌      | 21/60 [01:26<02:39,  4.08s/it]

0.7333333333333333


 37%|███▋      | 22/60 [01:30<02:35,  4.09s/it]

0.8


 38%|███▊      | 23/60 [01:34<02:31,  4.09s/it]

0.7666666666666667


 40%|████      | 24/60 [01:38<02:28,  4.11s/it]

0.7666666666666667


 42%|████▏     | 25/60 [01:42<02:24,  4.13s/it]

0.7333333333333333


 43%|████▎     | 26/60 [01:46<02:20,  4.13s/it]

0.7666666666666667


 45%|████▌     | 27/60 [01:50<02:16,  4.13s/it]

0.7333333333333333


 47%|████▋     | 28/60 [01:55<02:13,  4.16s/it]

0.8


 48%|████▊     | 29/60 [01:59<02:09,  4.18s/it]

0.8


 50%|█████     | 30/60 [02:03<02:06,  4.22s/it]

0.7666666666666667


 52%|█████▏    | 31/60 [02:07<02:01,  4.18s/it]

0.7666666666666667


 53%|█████▎    | 32/60 [02:11<01:56,  4.16s/it]

0.7666666666666667


 55%|█████▌    | 33/60 [02:15<01:52,  4.15s/it]

0.8


 57%|█████▋    | 34/60 [02:20<01:47,  4.14s/it]

0.8333333333333334


 58%|█████▊    | 35/60 [02:24<01:42,  4.12s/it]

0.8666666666666667


 60%|██████    | 36/60 [02:28<01:38,  4.11s/it]

0.8


 62%|██████▏   | 37/60 [02:32<01:34,  4.09s/it]

0.8333333333333334


 63%|██████▎   | 38/60 [02:36<01:29,  4.08s/it]

0.8


 65%|██████▌   | 39/60 [02:40<01:25,  4.09s/it]

0.7666666666666667


 67%|██████▋   | 40/60 [02:44<01:21,  4.09s/it]

0.8


 68%|██████▊   | 41/60 [02:48<01:17,  4.09s/it]

0.7666666666666667


 70%|███████   | 42/60 [02:52<01:13,  4.08s/it]

0.8


 72%|███████▏  | 43/60 [02:56<01:09,  4.09s/it]

0.8333333333333334


 73%|███████▎  | 44/60 [03:00<01:05,  4.09s/it]

0.8333333333333334


 75%|███████▌  | 45/60 [03:05<01:01,  4.11s/it]

0.7666666666666667


 77%|███████▋  | 46/60 [03:09<00:57,  4.11s/it]

0.8333333333333334


 78%|███████▊  | 47/60 [03:13<00:53,  4.11s/it]

0.9


 80%|████████  | 48/60 [03:17<00:49,  4.10s/it]

0.8666666666666667


 82%|████████▏ | 49/60 [03:21<00:45,  4.11s/it]

0.8333333333333334


 83%|████████▎ | 50/60 [03:25<00:41,  4.12s/it]

0.9333333333333333


 85%|████████▌ | 51/60 [03:29<00:37,  4.12s/it]

0.9


 87%|████████▋ | 52/60 [03:33<00:32,  4.12s/it]

0.9


 88%|████████▊ | 53/60 [03:38<00:28,  4.12s/it]

0.9666666666666667


 90%|█████████ | 54/60 [03:42<00:24,  4.12s/it]

0.9


 92%|█████████▏| 55/60 [03:46<00:20,  4.13s/it]

0.8333333333333334


 93%|█████████▎| 56/60 [03:50<00:16,  4.13s/it]

0.8333333333333334


 95%|█████████▌| 57/60 [03:54<00:12,  4.14s/it]

0.8666666666666667


 97%|█████████▋| 58/60 [03:58<00:08,  4.14s/it]

0.8666666666666667


 98%|█████████▊| 59/60 [04:02<00:04,  4.12s/it]

0.9


100%|██████████| 60/60 [04:06<00:00,  4.11s/it]

0.8666666666666667





In [189]:
for man in pop:
    print(man, end="")
    print(fitness(man))

0.185150838621772 7.449220079963601 0.08061699882461633 0.2879562953487 0.15062773317922873 0.5940172559907504 0.7666666666666667
0.8774956354883903 7.119090843163844 0.14064467692522048 0.12097700425198403 0.5537865685362439 0.20809637144595636 0.7
0.7771590222080977 7.209273237009009 0.7054314827995146 0.00163738102782654 0.2078883820191882 0.19412886951745525 0.7666666666666667
0.6435949236717308 6.870256383659132 0.2416154360983365 0.35346172698319656 0.2565043408670175 0.2109672978721302 0.8
0.103068938797092 7.165822761295933 0.16564335950147022 0.34143284429828613 0.28632046764046654 0.10971698210183367 0.8
0.05769704455061875 6.911466009902176 0.20091019461127818 0.23822329138594728 0.3310932077321011 0.11271609276458218 0.8
0.8373587381614686 6.7399430955327055 0.4482833844433881 0.034105778350264945 0.0594163919565954 0.805612091719466 0.6666666666666666
0.316128723588722 6.796429561971617 0.5170351686477335 0.20213550339342165 0.24093631469247917 0.06582712288701553 0.8
0.30