# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task 2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task 2.2: An agent using evolved rules using ES

In [1]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
from random import random, choice, randint
from copy import deepcopy
from typing import Callable, Literal
from dataclasses import dataclass, field
from tqdm.notebook import trange
import pickle
import numpy as np

## The *Nim* and *Nimply* classes

In [2]:
# named tuple to indicate a possible Nim ply
Nimply = namedtuple("Nimply", "row, num_objects")

In [3]:
class Nim:
    """
    Class implementing the Nim game.
    """

    def __init__(self, num_rows: int, k: int = None) -> None:
        """
        Game constructor.

        Args:
            num_rows: number of rows (piles);
            k: maximum number of objects you can nim from a row.

        Returns:
            None.
        """
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    def __repr__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        """
        Update the game by performing a ply.

        Args:
            ply: ply to perform.

        Returns:
            None.
        """
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k, f"{num_objects=}, {self._k=}"
        self._rows[row] -= num_objects

## Game Definition

In [4]:
def play_game(nim: Nim, strategy1: Callable[[Nim], Nimply], strategy2: Callable[[Nim], Nimply]) -> Literal[0, 1]:
    """
    Play a Nim game using the given strategies.

    Args:
        nim: Nim game instance;
        strategy1: Player 0 strategy;
        strategy2: Player 1 strategy.

    Returns:
        player: the winning player.
    """
    logging.getLogger().setLevel(logging.WARN)

    strategy = (strategy1, strategy2)

    logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        logging.info(f"ply: player {player} plays {ply}, {nim_sum(nim)}")
        nim.nimming(ply)
        logging.info(f"status: {nim}")
        player = 1 - player
    logging.info(f"status: Player {player} won!")

    return player


def play_games(
    nim: Nim,
    player: int,
    player_strategy: Callable[[Nim], Nimply],
    opponent_strategy: Callable[[Nim], Nimply],
    n_matches: int,
) -> list[int]:
    """
    Play a given number of matches on a Nim game instance.

    Args:
        nim: Nim game instance;
        player: choose if your strategy is played by the first or second player;
        player_strategy: your player strategy;
        opponent_strategy: your opponent strategy;
        n_matches: number of matchers to play.

    Returns:
        List history of the winning players.
    """
    return [
        play_game(deepcopy(nim), player_strategy, opponent_strategy)
        if player == 0
        else play_game(deepcopy(nim), opponent_strategy, player_strategy)
        for _ in range(n_matches)
    ]

In [5]:
def streak(player_strategy, opponent_strategy, n_matches):
    """
    Play a given number of random matches between two strategies.

    Args:
        player_strategy: your player strategy;
        opponent_strategy: your opponent strategy;
        n_matches: number of matchers to play.

    Returns:
        ercentage of wins.
    """
    wins = 0
    for _ in range(n_matches):
        random_size = randint(4, 10)
        random_k = choice([None, None, *[randint(2, random_size * 2 + 1) for _ in range(2)]])
        nim = Nim(random_size, random_k)
        player = choice([0, 1])
        wins += 1 if play_games(nim, player, player_strategy, opponent_strategy, 1)[0] == player else 0
    return wins / n_matches

## Rule-based strategies 

In [6]:
def pure_random(state: Nim) -> Nimply:
    """
    Perform a completely random move.

    Args:
        state: Nim game instance.

    Returns:
        A ply is returned.
    """
    row = choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = randint(1, state.rows[row] if state.k is None else min(state.rows[row], state.k))
    return Nimply(row, num_objects)

In [7]:
def gabriele(state: Nim) -> Nimply:
    """
    Pick always the maximum possible number of the lowest row.

    Args:
        state: Nim game instance.

    Returns:
        A ply is returned.
    """
    possible_moves = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1 if state.k is None else min(c + 1, state.k))
    ]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))

In [8]:
def nim_sum(state: Nim) -> int:
    """
    Compute nim-sum value on a Nim game instance.

    Args:
        state: Nim game instance.

    Returns:
        The nim-sum value of the current game is returned.
    """
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def generate_all_plies(state: Nim) -> list[Nimply]:
    """
    Generate all possible plies on the current game.

    Args:
        state: Nim game instance.

    Returns:
        A list of plies is returned.
    """
    return [
        Nimply(r, o)
        for r, c in enumerate(state.rows)
        for o in range(1, c + 1 if state.k is None else min(c + 1, state.k))
    ]


def analize(state: Nim) -> dict:
    """
    Given a Nim game instance, this function computes all the possible plies
    and the corresponding nim-sum values.

    Args:
        state: Nim game instance.

    Returns:
        Plies and corresponding nim-sum values are returned as a dict.
    """
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in generate_all_plies(state):
        tmp = deepcopy(state)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    """
    If possible, this function returns a move which leads to a nim-sum value not equal to zero,
    otherwise a random move among all the possible moves.

    Args:
        state: Nim game instance.

    Returns:
        A ply is returned.
    """
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    logging.debug(pformat(f"{analysis['possible_moves']}"))
    ply = choice(spicy_moves)
    return ply

### Task 2.1

Without considering the value of $k$, we can win in a Nim game if we always play a move in order to have a nim-sum value of $0$.

At some point, we end up in a position that has only one row of size 2 or more. \
In such a position the nim-sum value is not equal to 0. To win we must reduce this to size 0 or 1, leaving an odd number of rows with size 1. From that point on, all moves are forced.

_*Reference: https://en.wikipedia.org/wiki/Nim#Proof_of_the_winning_formula*_.

The following function implements this winning strategy.

In [9]:
def expert_system(state: Nim) -> Nimply:
    """
    This function implement an expert system which beats the strategies defined above.
    Details on how to win are available at https://en.wikipedia.org/wiki/Nim#Proof_of_the_winning_formula.

    Args:
        state: Nim game instance.

    Returns:
        A ply is returned.
    """
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    not_zero_rows = len(state.rows) - state.rows.count(0)
    one_count_rows = state.rows.count(1)
    if one_count_rows == not_zero_rows - 1:
        is_odd = (one_count_rows % 2) == 1
        row, objects = [(row, objects) for row, objects in enumerate(state.rows) if objects > 1][0]
        if is_odd:
            return Nimply(row, objects if state.k is None else min(objects, state.k))
        return Nimply(row, objects - 1 if state.k is None else min(objects - 1, state.k))
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    logging.debug(pformat(f"{analysis['possible_moves']}"))
    ply = choice(spicy_moves)
    return ply

## Adaptive Strategy

### Task 2.2

Our idea is to implement an adaptive strategy that learns how to play depending on the current game phase. 

Suppose that we have $3$ different phases:
- _early game_;
- _mid game_;
- _end game_.

We decide in which phase we are in based on the formula $n_\textrm{phase} = \dfrac{\textrm{\# remaining plies}}{\textrm{\# possible plies from the beginning}} \in [0, 1]$.
 
We define $t_1, t_2$, two thresholds learnt by the _ES_ strategy, that divide the interval $[0, 1]$ in three parts: $i_1 = [0, t_1)$, $i_2 = [t_1, t_2]$ and $i_3 = (t_2, 1]$. \
If $n_\textrm{phase} \in i_1$ we are in _early game_, $n_\textrm{phase} \in i_2$ we are in _mid game_, otherwise we are in _end game_.

Each phase has its own set of weights. One weight is associated to one of the strategies defined above. The probability of picking a strategy for a ply is directly proportional to its weight value.

> Note: Our first attempt was to repeatedly apply a softmax function to maintain a list of probabilities instead of a list of weights. Using this technique, each probability converges to $\dfrac{1}{n_{\textrm{strategies}}}$ and then it is not applicable.

The _ES_ algorithm learns which strategy is best to play in each phase.

An individual fitness is the percentage of wins against an `expert_system` in a random sequence of matches.

In [10]:
def get_phase_ratio(state: Nim) -> float:
    """
    Get the n_phase value.

    Args:
        state: Nim game instance.

    Returns:
        The n_phase value is returned.
    """
    all_plys_new_game = len(generate_all_plies(Nim(len(state.rows), state.k)))
    all_plys_current_game = len(generate_all_plies(state))
    return all_plys_current_game / all_plys_new_game

In [11]:
@dataclass(init=False)
class Individual:
    """
    Class that represents an individual.
    """

    strategies: list[Callable[[Nim], Nimply]]
    strategy_weights: list[list[float]]
    phase_thresholds: list[float]

    def __init__(
        self, strategies: int = None, strategy_weights: list[list[float]] = None, phase_thresholds: list[float] = None
    ):
        """
        Individual constructor.

        Args:
            strategies: strategies to use;
            strategy_weights: matrix with a weight for each strategy and game phase;
            phase_thresholds: thresholds to split the game in 3 phases.
        """
        if strategies is None:
            strategies = [pure_random, gabriele, optimal, expert_system]
        if strategy_weights is None:
            strategy_weights = np.random.randint(low=1, high=10, size=(3, len(strategies)))
        if phase_thresholds is None:
            phase_thresholds = sorted([random(), random()])
        else:
            phase_thresholds = sorted(phase_thresholds)

        self.strategies = strategies
        self.strategy_weights = strategy_weights
        self.phase_thresholds = phase_thresholds

    def _softmax(self, values) -> list[list[float]]:
        tmp = np.exp(values)
        return (tmp / np.sum(tmp)).tolist()

    def mutate(ind: "Individual") -> "Individual":
        global mutation_rate
        strategy_weights = np.clip(np.random.normal(loc=ind.strategy_weights, scale=mutation_rate[0]), 1, 10).tolist()
        phase_thresholds = np.clip(np.random.normal(loc=ind.phase_thresholds, scale=mutation_rate[1]), 0, 1).tolist()
        return Individual(
            strategies=ind.strategies, strategy_weights=strategy_weights, phase_thresholds=phase_thresholds
        )

    def __call__(self, state: Nim) -> Nimply:
        phase_ratio = get_phase_ratio(state)
        phase_index = (
            0 if phase_ratio < self.phase_thresholds[0] else (2 if phase_ratio > self.phase_thresholds[1] else 1)
        )
        weights = self.strategy_weights[phase_index]
        strategy = np.random.choice(self.strategies, p=self._softmax(weights))
        return strategy(state)

### (1 + $\lambda$)-ES

In a _(1 + $\lambda$)-ES strategy_ we start with a single parent and in each iteration we generate $\lambda$ new individuals (_offspring_) using only random mutations performed on the parent. \
We then evaluate the quality of the new individuals and decide whether it is better to keep the parent or to update it, i.e. a new individual becomes the parent.

In [41]:
LAMBDA = 30
N_MATCHES = 10
N_ITERS = 1_000
FACTOR = 1.5
OPPONENT = expert_system
mutation_rate: tuple[float] = (2.5, 0.1)

In [36]:
parent = Individual()
parent_fitness = streak(parent, OPPONENT, N_MATCHES)
print('-- First Individual')
pprint(parent)

pbar = trange(0, N_ITERS // LAMBDA)
for _ in pbar:
    pbar.set_description(f'Parent Accuracy: {parent_fitness:.2%}')
    offspring = [parent.mutate() for _ in range(LAMBDA)]
    offspring_fitness = [streak(ind, OPPONENT, N_MATCHES) for ind in offspring]

    if np.sum(np.array(offspring_fitness) > parent_fitness) / LAMBDA > 1 / 5:
        mutation_rate = (mutation_rate[0] * FACTOR, mutation_rate[1] * FACTOR)
    else:
        mutation_rate = (mutation_rate[0] / FACTOR, mutation_rate[1] / FACTOR)

    solution_index = np.argmax(offspring_fitness)
    if parent_fitness < offspring_fitness[solution_index]:
        parent = offspring[solution_index]
        parent_fitness = offspring_fitness[solution_index]

    if parent_fitness >= 0.999:
        break

best_plus = parent
print('-- Best Individual')
pprint(best_plus)
print(f'Best Individual Accuracy: {streak(best_plus, OPPONENT, 100):.2%}')

-- First Individual
Individual(strategies=[<function pure_random at 0x1096c9760>,
                       <function gabriele at 0x1096c98a0>,
                       <function optimal at 0x1096c9b20>,
                       <function expert_system at 0x1096c9580>],
           strategy_weights=array([[2, 2, 3, 9],
       [3, 7, 7, 4],
       [1, 5, 2, 9]]),
           phase_thresholds=[0.3178864333248911, 0.37234192826551626])


  0%|          | 0/33 [00:00<?, ?it/s]

-- Best Individual
Individual(strategies=[<function pure_random at 0x1096c9760>,
                       <function gabriele at 0x1096c98a0>,
                       <function optimal at 0x1096c9b20>,
                       <function expert_system at 0x1096c9580>],
           strategy_weights=[[1.0546074594975139,
                              1.2371015165152381,
                              1.2987271315346984,
                              8.837894831030649],
                             [5.359184210109394,
                              2.8403651499224076,
                              3.6836658822228787,
                              9.94485543681981],
                             [4.821053743895413,
                              2.5925847466986003,
                              3.204851545801586,
                              9.076811249985374]],
           phase_thresholds=[0.3513941449954518, 0.39292677311548024])
Best Individual Accuracy: 53.00%


In [38]:
# serialize the best individual for the plus strategy
with open('best_plus.pkl', 'wb') as f:
    pickle.dump(best_plus, f)

In [15]:
# load the best individual for the plus strategy
with open('best_plus.pkl', 'rb') as f:
    best_plus = pickle.load(f)

### ($\mu$ + $\lambda$)-ES

In a _($\mu$ + $\lambda$)-ES strategy_ we start with $\mu$ parents and in each iteration we generate $\lambda$ new individuals (_offspring_) using only random mutations performed on the parents. Each parent will generate $\dfrac{\lambda}{\mu}$ new individuals. \
We then evaluate the quality of the new individuals and update the parents by keeping only the best $\mu$ individuals among the parents and the offspring.

In [61]:
MU = 3
LAMBDA = 30
N_MATCHES = 10
N_ITERS = 1_000
FACTOR = 1.5
OPPONENT = expert_system
mutation_rate: tuple[float] = (2.5, 0.1)

In [62]:
map_individual_to_fitness = np.vectorize(lambda parent: streak(parent, OPPONENT, N_MATCHES))

parents = np.array([Individual() for _ in range(MU)])
parents_fitness = map_individual_to_fitness(parents)
max_parents_fitness = np.max(parents_fitness)
print('-- First Individuals')
pprint(parents)

pbar = trange(0, N_ITERS // LAMBDA)
for _ in pbar:
    pbar.set_description(f'Best Parent Accuracy: {max_parents_fitness:.2%}')
    offspring = np.array([parent.mutate() for parent in parents for _ in range(LAMBDA // MU)])
    offspring_fitness = map_individual_to_fitness(offspring)

    if np.sum(offspring_fitness > max_parents_fitness) / LAMBDA > 1 / 5:
        mutation_rate = (mutation_rate[0] * FACTOR, mutation_rate[1] * FACTOR)
    else:
        mutation_rate = (mutation_rate[0] / FACTOR, mutation_rate[1] / FACTOR)

    population = np.hstack((offspring, parents))
    population_fitness = np.hstack((offspring_fitness, parents_fitness))
    population_fitness_indexes = np.argsort(population_fitness)
    parents = population[population_fitness_indexes][::-1][:MU]
    parents_fitness = population_fitness[population_fitness_indexes][::-1][:MU]
    max_parents_fitness = np.max(parents_fitness)

    if max_parents_fitness >= 0.999:
        break

best_plus_mu_lambda = choice(parents[parents_fitness == max_parents_fitness])
print('-- Best Individuals')
pprint(best_plus_mu_lambda)
print(f'Best Individual Accuracy: {streak(best_plus_mu_lambda, OPPONENT, 100):.2%}')

-- First Individuals
array([Individual(strategies=[<function pure_random at 0x1096c9760>, <function gabriele at 0x1096c98a0>, <function optimal at 0x1096c9b20>, <function expert_system at 0x1096c9580>], strategy_weights=array([[6, 9, 3, 2],
              [6, 4, 3, 1],
              [9, 9, 8, 3]]), phase_thresholds=[0.6458599491837625, 0.7849314920012852])                                                                                                                                   ,
       Individual(strategies=[<function pure_random at 0x1096c9760>, <function gabriele at 0x1096c98a0>, <function optimal at 0x1096c9b20>, <function expert_system at 0x1096c9580>], strategy_weights=array([[5, 4, 9, 2],
              [1, 6, 6, 7],
              [9, 9, 7, 9]]), phase_thresholds=[0.3633419256594267, 0.6703733589327934])                                                                                                                                   ,
       Individual(strategies=[<function p

  0%|          | 0/33 [00:00<?, ?it/s]

-- Best Individuals
Individual(strategies=[<function pure_random at 0x1096c9760>,
                       <function gabriele at 0x1096c98a0>,
                       <function optimal at 0x1096c9b20>,
                       <function expert_system at 0x1096c9580>],
           strategy_weights=[[5.637417236034652,
                              3.6404611215484928,
                              3.5138873688328736,
                              5.495421847944803],
                             [1.1625274727520074,
                              3.12444533004775,
                              5.413734875616345,
                              10.0],
                             [2.7826479059572513,
                              6.609153819328993,
                              5.2727669520168154,
                              9.42103106095656]],
           phase_thresholds=[1.111228389619841e-06, 0.662344690194218])
Best Individual Accuracy: 35.00%


In [63]:
# serialize the best individual for the plus (mu + lambda) strategy
with open('best_plus_mu_lambda.pkl', 'wb') as f:
    pickle.dump(best_plus_mu_lambda, f)

In [14]:
# load the best individual for the plus (mu + lambda) strategy
with open('best_plus_mu_lambda.pkl', 'rb') as f:
    best_plus_mu_lambda = pickle.load(f)

### (1, $\lambda$)-ES

In a _(1, $\lambda$)-ES strategy_ we start with a single parent and in each iteration we generate $\lambda$ new individuals (_offspring_) using only random mutations performed on the parent. \
We then evaluate the quality of the new individuals and the best individual in the offspring becomes the new parent.

In [74]:
LAMBDA = 30
N_MATCHES = 10
N_ITERS = 1_000
FACTOR = 1.1
OPPONENT = expert_system
mutation_rate: tuple[float] = (1.5, 0.1)

In [75]:
parent = Individual()
parent_fitness = streak(parent, OPPONENT, N_MATCHES)
best_comma = parent
best_comma_fitness = parent_fitness
print('-- First Individual')
pprint(parent)

pbar = trange(0, N_ITERS // LAMBDA)
for _ in pbar:
    pbar.set_description(f'Parent Accuracy: {parent_fitness:.2%}')
    offspring = [parent.mutate() for _ in range(LAMBDA)]
    offspring_fitness = [streak(ind, OPPONENT, N_MATCHES) for ind in offspring]

    if np.sum(np.array(offspring_fitness) > parent_fitness) / LAMBDA > 1 / 5:
        mutation_rate = (mutation_rate[0] * FACTOR, mutation_rate[1] * FACTOR)
    else:
        mutation_rate = (mutation_rate[0] / FACTOR, mutation_rate[1] / FACTOR)

    solution_index = np.argmax(offspring_fitness)
    parent = offspring[solution_index]
    parent_fitness = offspring_fitness[solution_index]

    if best_comma_fitness < parent_fitness:
        best_comma = parent
        best_comma_fitness = parent_fitness

    if parent_fitness >= 0.999:
        break

print('-- Best Individual')
pprint(best_comma)
print(f'Best Individual Accuracy: {streak(best_comma, OPPONENT, 100):.2%}')

-- First Individual
Individual(strategies=[<function pure_random at 0x1096c9760>,
                       <function gabriele at 0x1096c98a0>,
                       <function optimal at 0x1096c9b20>,
                       <function expert_system at 0x1096c9580>],
           strategy_weights=array([[9, 7, 9, 9],
       [1, 5, 8, 2],
       [7, 9, 2, 4]]),
           phase_thresholds=[0.7719534138669364, 0.8310978528484756])


  0%|          | 0/33 [00:00<?, ?it/s]

-- Best Individual
Individual(strategies=[<function pure_random at 0x1096c9760>,
                       <function gabriele at 0x1096c98a0>,
                       <function optimal at 0x1096c9b20>,
                       <function expert_system at 0x1096c9580>],
           strategy_weights=[[4.664006878649911,
                              3.8736983447138247,
                              5.220779969398328,
                              9.79578265740777],
                             [2.8252679397185076,
                              3.294736753635436,
                              3.856527981637476,
                              5.791523798828324],
                             [2.306070214180621,
                              5.320849194254264,
                              2.3766687139005955,
                              8.726438276922705]],
           phase_thresholds=[0.8467489248038672, 0.927095654088388])
Best Individual Accuracy: 41.00%


In [76]:
# serialize the best individual for the comma strategy
with open('best_comma.pkl', 'wb') as f:
    pickle.dump(best_comma, f)

In [12]:
# load the best individual for the comma strategy
with open('best_comma.pkl', 'rb') as f:
    best_comma = pickle.load(f)

### ($\mu$, $\lambda$)-ES

In a _($\mu$, $\lambda$)-ES strategy_ we start with $\mu$ parents and in each iteration we generate $\lambda$ new individuals (_offspring_) using only random mutations performed on the parents. Each parent will generate $\dfrac{\lambda}{\mu}$ new individuals. \
We then evaluate the quality of the new individuals and update the parents by keeping only the best $\mu$ individuals in the offspring.

In [79]:
MU = 3
LAMBDA = 30
N_MATCHES = 10
N_ITERS = 1_000
FACTOR = 1.5
OPPONENT = expert_system
mutation_rate: tuple[float] = (2.5, 0.1)

In [80]:
map_individual_to_fitness = np.vectorize(lambda parent: streak(parent, OPPONENT, N_MATCHES))

parents = np.array([Individual() for _ in range(MU)])
parents_fitness = map_individual_to_fitness(parents)
max_parents_fitness = np.max(parents_fitness)
best_comma_mu_lambda = choice(parents[parents_fitness == max_parents_fitness])
best_comma_mu_lambda_fitness = max_parents_fitness
print('-- First Individuals')
pprint(parents)

pbar = trange(0, N_ITERS // LAMBDA)
for _ in pbar:
    pbar.set_description(f'Best Parent Accuracy: {max_parents_fitness:.2%}')
    offspring = np.array([parent.mutate() for parent in parents for _ in range(LAMBDA // MU)])
    offspring_fitness = map_individual_to_fitness(offspring)

    if np.sum(offspring_fitness > max_parents_fitness) / LAMBDA > 1 / 5:
        mutation_rate = (mutation_rate[0] * FACTOR, mutation_rate[1] * FACTOR)
    else:
        mutation_rate = (mutation_rate[0] / FACTOR, mutation_rate[1] / FACTOR)

    offspring_fitness_indexes = np.argsort(offspring_fitness)
    parents = offspring[offspring_fitness_indexes][::-1][:MU]
    parents_fitness = offspring_fitness[offspring_fitness_indexes][::-1][:MU]
    max_parents_fitness = np.max(parents_fitness)

    if best_comma_mu_lambda_fitness < max_parents_fitness:
        best_comma_mu_lambda = choice(parents[parents_fitness == max_parents_fitness])
        best_comma_mu_lambda_fitness = max_parents_fitness

    if max_parents_fitness >= 0.999:
        break

print('-- Best Individuals')
pprint(best_comma_mu_lambda)
print(f'Best Individual Accuracy: {streak(best_comma_mu_lambda, OPPONENT, 100):.2%}')

-- First Individuals
array([Individual(strategies=[<function pure_random at 0x1096c9760>, <function gabriele at 0x1096c98a0>, <function optimal at 0x1096c9b20>, <function expert_system at 0x1096c9580>], strategy_weights=array([[8, 7, 9, 5],
              [3, 6, 3, 8],
              [2, 5, 7, 3]]), phase_thresholds=[0.538656898774971, 0.7547411767064064])                                                                                                                                    ,
       Individual(strategies=[<function pure_random at 0x1096c9760>, <function gabriele at 0x1096c98a0>, <function optimal at 0x1096c9b20>, <function expert_system at 0x1096c9580>], strategy_weights=array([[4, 1, 7, 3],
              [8, 7, 8, 3],
              [1, 8, 9, 3]]), phase_thresholds=[0.06652628499690783, 0.7627991617592504])                                                                                                                                  ,
       Individual(strategies=[<function p

  0%|          | 0/33 [00:00<?, ?it/s]

-- Best Individuals
Individual(strategies=[<function pure_random at 0x1096c9760>,
                       <function gabriele at 0x1096c98a0>,
                       <function optimal at 0x1096c9b20>,
                       <function expert_system at 0x1096c9580>],
           strategy_weights=[[4.5539151576395955,
                              2.745291289717418,
                              2.5529870687978877,
                              9.525133044134293],
                             [1.844380179897061,
                              7.26207565382174,
                              8.224640343139457,
                              4.913202339337638],
                             [6.911161153550008,
                              1.7293993326010413,
                              4.285519853688302,
                              9.705183146898264]],
           phase_thresholds=[0.7905139250241182, 0.8113032777940925])
Best Individual Accuracy: 38.00%


In [81]:
# serialize the best individual for the comma (mu, lambda) strategy
with open('best_comma_mu_lambda.pkl', 'wb') as f:
    pickle.dump(best_comma_mu_lambda, f)

In [13]:
# load the best individual for the comma (mu, lambda) strategy
with open('best_comma_mu_lambda.pkl', 'rb') as f:
    best_comma_mu_lambda = pickle.load(f)

### Observations

Since our *evolved agents* include `expert_system` among the strategies they can use, we expect its weight to be the highest in all phases of the game after a few generations. 


In particular, we can sometimes see that `expert_system` does not have the highest weight in some phases. This is because the population has learned to discard some phases by making their interval negligible. In these cases, the corresponding strategy weights are never taken into account.

All in all, each agent will play the best possible strategy most of the time, with the exception of a few rounds that could lead to victory. If `expert_system` expects us to play the best possible play and we do not, the game could go in our favour.

## Assess Strategies

In [28]:
def assess_strategy(
    player_strategy: Callable[[Nim], Nimply],
    opponent_strategies: list[Callable[[Nim], Nimply]],
    n_matches: int,
) -> None:
    """
    This function prints how many times the given player strategy
    wins against the opponent strategies by playing a certain number
    of random Nim games.

    Args:
        player_strategy: which strategy to play;
        opponent_strategies: which strategies to play against;
        n_matches: number of matches to assess the quality of the strategy.

    Returns:
        None.
    """
    for opponent_strategy in opponent_strategies:
        accuracy = streak(player_strategy, opponent_strategy, n_matches)
        print(f"-- Player {player_strategy.__qualname__} against {opponent_strategy.__qualname__}: {accuracy:.2%} wins")

### Expert System

In [92]:
assess_strategy(
    player_strategy=expert_system,
    opponent_strategies=[pure_random, gabriele, optimal],
    n_matches=1000,
)

-- Player expert_system against pure_random: 97.30% wins
-- Player expert_system against gabriele: 98.70% wins
-- Player expert_system against optimal: 97.80% wins


### (1 + $\lambda$)-ES

In [100]:
best_plus.__qualname__ = "adaptive strategy (1 + 𝜆)-ES"

assess_strategy(
    player_strategy=best_plus,
    opponent_strategies=[pure_random, gabriele, optimal, expert_system],
    n_matches=1000,
)

-- Player adaptive strategy (1 + 𝜆)-ES against pure_random: 97.40% wins
-- Player adaptive strategy (1 + 𝜆)-ES against gabriele: 99.10% wins
-- Player adaptive strategy (1 + 𝜆)-ES against optimal: 98.20% wins
-- Player adaptive strategy (1 + 𝜆)-ES against expert_system: 48.10% wins


In [28]:
best_plus.__qualname__ = "adaptive strategy (1 + 𝜆)-ES"
player = randint(0, 1)
accuracy = (
    play_games(
        nim=Nim(5, 3),
        player=player,
        player_strategy=best_plus,
        opponent_strategy=expert_system,
        n_matches=100,
    ).count(player)
    / 100
)
print(
    f'{best_plus.__qualname__} performance as Player {player} against {expert_system.__qualname__} on the 100-match Nim(5, 3) competition: {accuracy:.2%}'
)

adaptive strategy (1 + 𝜆)-ES performance as Player 0 against expert_system on the 100-match Nim(5, 3) competition: 59.00%


### ($\mu$ + $\lambda$)-ES

In [101]:
best_plus_mu_lambda.__qualname__ = "adaptive strategy (μ + 𝜆)-ES"

assess_strategy(
    player_strategy=best_plus_mu_lambda,
    opponent_strategies=[pure_random, gabriele, optimal, expert_system],
    n_matches=1000,
)

-- Player adaptive strategy (μ + 𝜆)-ES against pure_random: 96.10% wins
-- Player adaptive strategy (μ + 𝜆)-ES against gabriele: 98.10% wins
-- Player adaptive strategy (μ + 𝜆)-ES against optimal: 97.50% wins
-- Player adaptive strategy (μ + 𝜆)-ES against expert_system: 42.50% wins


In [30]:
best_plus_mu_lambda.__qualname__ = "adaptive strategy (μ + 𝜆)-ES"
player = randint(0, 1)
accuracy = (
    play_games(
        nim=Nim(5, 3),
        player=player,
        player_strategy=best_plus_mu_lambda,
        opponent_strategy=expert_system,
        n_matches=100,
    ).count(player)
    / 100
)
print(
    f'{best_plus_mu_lambda.__qualname__} performance as Player {player} against {expert_system.__qualname__} on the 100-match Nim(5, 3) competition: {accuracy:.2%}'
)

adaptive strategy (μ + 𝜆)-ES performance as Player 0 against expert_system on the 100-match Nim(5, 3) competition: 60.00%


### (1, $\lambda$)-ES

In [102]:
best_comma.__qualname__ = "adaptive strategy (1, 𝜆)-ES"

assess_strategy(
    player_strategy=best_comma,
    opponent_strategies=[pure_random, gabriele, optimal, expert_system],
    n_matches=1000,
)

-- Player adaptive strategy (1, 𝜆)-ES against pure_random: 96.10% wins
-- Player adaptive strategy (1, 𝜆)-ES against gabriele: 98.60% wins
-- Player adaptive strategy (1, 𝜆)-ES against optimal: 97.80% wins
-- Player adaptive strategy (1, 𝜆)-ES against expert_system: 41.30% wins


In [91]:
best_comma.__qualname__ = "adaptive strategy (1, 𝜆)-ES"
player = randint(0, 1)
accuracy = (
    play_games(
        nim=Nim(5, 3),
        player=player,
        player_strategy=best_comma,
        opponent_strategy=expert_system,
        n_matches=100,
    ).count(player)
    / 100
)
print(
    f'{best_comma.__qualname__} performance as Player {player} against {expert_system.__qualname__} on the 100-match Nim(5, 3) competition: {accuracy:.2%}'
)

adaptive strategy (1, 𝜆)-ES performance as Player 0 against expert_system on the 100-match Nim(5, 3) competition: 65.00%


### ($\mu$, $\lambda$)-ES

In [103]:
best_comma_mu_lambda.__qualname__ = "adaptive strategy (μ, 𝜆)-ES"

assess_strategy(
    player_strategy=best_comma_mu_lambda,
    opponent_strategies=[pure_random, gabriele, optimal, expert_system],
    n_matches=1000,
)

-- Player adaptive strategy (μ, 𝜆)-ES against pure_random: 96.90% wins
-- Player adaptive strategy (μ, 𝜆)-ES against gabriele: 98.00% wins
-- Player adaptive strategy (μ, 𝜆)-ES against optimal: 98.10% wins
-- Player adaptive strategy (μ, 𝜆)-ES against expert_system: 41.00% wins


In [84]:
best_comma_mu_lambda.__qualname__ = "adaptive strategy (μ, 𝜆)-ES"
player = randint(0, 1)
accuracy = (
    play_games(
        nim=Nim(5, 3),
        player=player,
        player_strategy=best_comma_mu_lambda,
        opponent_strategy=expert_system,
        n_matches=100,
    ).count(player)
    / 100
)
print(
    f'{best_comma_mu_lambda.__qualname__} performance as Player {player} against {expert_system.__qualname__} on the 100-match Nim(5, 3) competition: {accuracy:.2%}'
)

adaptive strategy (μ, 𝜆)-ES performance as Player 0 against expert_system on the 100-match Nim(5, 3) competition: 59.00%


### Result analysis

As we expected, the _ES_ strategies developed in the sections above play better than any other _rule-based strategy_ except for `expert_system`.

Against `expert_system`, we get a percentage of wins nearly equal to $50\%$. 

My idea is that we should have achieved at least a value greater than $50\%$ because we have more knowledge than the implemented `expert_system`, in the sense that we can theoretically modulate which strategy to use, and we can occasionally fool the opponent with an unexpected ply. \
This increase in results could be obtained by tuning the hyperparameters of each _ES_ strategy or by increasing the number of generations (iterations).