## Evolution Strategy - Nim

1. Rappresentazione dello stato del gioco -> Nimply, Nim
2. Generazione delle mosse valide -> possible_moves(state)
3. Funzione di valutazione -> fitness_function(strategy)
    * fai giocare la strategia contro un'altra strategia e incrementa il punteggio se vince
    * possibilità di organizzare un torneo
    * possibilità di far giocare ogni strategia della popolazione contro tutte le altre strategie e prendere le migliori strategie per generare la nuova popolazione
4. Algoritmo evolutivo -> generation and evaluation of a population of strategies
    * Strategy -> a set of parameters that influence the behaviour of the agent
        * Probabilità di selezionare una mossa casuale
        * Punteggi assegnati a ciascuna riga
        * Livello di aggressività -> percentuale di oggetti da rimuovere dalla riga
5. Selezione, crossover e mutazione
6. Partita tra gli agents
7. Iterazione dell'algoritmo evolutivo
8. Test e valutazione
9. Ottimizzazione e affinamento
10. Documentazione e presentazione

This is a first + second try: with evolve() I basically obtain an almost random strategy, with evolve2() I obtain a strategy that is able to almost lose every match (it loses around >80% of the matches).
I think the problem is in the function that evaluates the strategy and chooses the move to make (evolution_strategy()).

In [126]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from dataclasses import dataclass
import math
import copy
from tqdm.notebook import tqdm

## The *Nim* and *Nimply* classes

In [4]:
# A way to represent a state of the game
Nimply = namedtuple("Nimply", "row, num_objects")

In [5]:
# A way to represent a game itself
class Nim:
    # The constructor: it builds the rows and defines the upper bound for the number of objects that can be removed
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    # ToString method
    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    # Returns the rows as a tuple
    def rows(self) -> tuple:
        return tuple(self._rows)

    # Checks if the move is valid and applies it
    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [427]:
NIM_SIZE = 5
MAX_OBJECTS = NIM_SIZE * 2 + 1


### Rule-Based Agent

In [7]:
import numpy as np

# Generates a "score" for the state of the game based on the transformation of the rows into binary strings and the xor of the rows
def nim_sum(state: Nim) -> int:
    # Transforms the rows into binary strings of length 32
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    # Xor of the rows
    xor = tmp.sum(axis=0) % 2
    # Return the value of the xor as an integer (0 means that the game is lost)
    return int("".join(str(_) for _ in xor), base=2)

# Generates a dictionary with the nim_sum (score) associated to all possible moves
def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    # For each possible move
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked

# Selects a move from the possible moves
def optimal(state: Nim) -> Nimply:
    # Generates the possible moves and their score
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    # Filters for moves that don't make the game lost (score = 0)
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    # Selects a random move from the possible moves
    ply = random.choice(spicy_moves)
    return ply

### Evolutionary Agent

In [8]:
@dataclass
class Strategy:
    randomness: float
    rows_scores: list[float]
    objects_to_take: float
    aggressive: bool

In [432]:
# Definition of parameters
NUMBER_OF_FITNESS_GAMES = 50
INITIAL_POPULATION = 10 * 2 # always divisible by 2
NUMBER_OF_GENERATIONS = 50
best_strategy = None

In [10]:
# Generates the initial population of strategies
def generate_initial_population(number_of_individuals: int) -> list[Strategy]:
    population = []
    for _ in range(number_of_individuals):
        population.append(
            Strategy(random.random(), 
                     [random.random() for _ in range(NIM_SIZE)], 
                     random.random(), 
                     random.choice([True, False])))
    return population

In [11]:
# generates a list with all the possible moves (of type Nimply) at the current state of the game
def possible_moves(state: Nim) -> list:
    possible_moves = []
    for ply in (Nimply(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)):
        try:
            tmp = deepcopy(state)
            tmp.nimming(ply)
            possible_moves.append(ply)
            # print("assertion ok")
        except AssertionError:
            print("assertion error")
    return possible_moves

In [444]:
# this should let strategies play against each other and assign a score to each of them based on the number of wins
def fitness_function(strategy1: Strategy, strategy2: Strategy, num_games = NUMBER_OF_FITNESS_GAMES) -> int:
    score = 0
    for _ in range(num_games):
        # everytime the strategy wins it gets a point
        if nim_match(strategy1, strategy2, fitness = True) == 1:
            score += 1
    return score

In [442]:
# this should let strategies play against each other and assign a score to each of them based on the number of wins
# version 2: this is used by evolve_2()
def fitness_function_2(strategy: Strategy, num_games = NUMBER_OF_FITNESS_GAMES) -> int:
    score = 0
    for _ in range(num_games):
        # everytime the strategy wins it gets a point
        if nim_match_2(optimal, strategy, fitness = True) == 1:
            score += 1
    return score

In [13]:
# still implemented in the code, todo: call this external function
def crossover(mother: Strategy, father: Strategy):
    child1 = Strategy(randomness = random.choice([mother.randomness, father.randomness]),
                      rows_scores = random.choice([mother.rows_scores, father.rows_scores]),
                      objects_to_take = random.choice([mother.objects_to_take, father.objects_to_take]),
                      aggressive = random.choice([mother.aggressive, father.aggressive]))
    child2 = Strategy(randomness = random.choice([mother.randomness, father.randomness]),
                        rows_scores = random.choice([mother.rows_scores, father.rows_scores]),
                        objects_to_take = random.choice([mother.objects_to_take, father.objects_to_take]),
                        aggressive = random.choice([mother.aggressive, father.aggressive]))
    return child1, child2

In [252]:
# this returns the best strategy in a population after the application of the genetic algorithm
# version 1.0 : the strategies play against each other and the best one is returned
def evolve() -> Strategy:
    # print("Evolve starting")
    # generate random strategies (initial population)
    population = generate_initial_population(INITIAL_POPULATION)
    # print("initial population:", population)

    # calculate the fitness of each strategy
    # each strategy plays against each other and itself NUMBER_OF_FITNESS_GAMES times
    scores = []
    for strategy1 in population:
        score = 0
        for strategy2 in population:
            score += fitness_function(strategy1, strategy2)
        scores.append((strategy1, score))
    
    # sort scores by fitnes descending
    scores.sort(key=lambda x: x[1], reverse=True)
    # print("scores:", scores)

    # start the genetic algorithm
    for _ in tqdm(range(NUMBER_OF_GENERATIONS)):
        new_generation = []

        # select the best strategies
        # we take the first half of the population (the best half)
        best_half = scores[:len(scores)//2]
        # print("best_half:", best_half)

        # for _ in range(INITIAL_POPULATION/2):
        while len(best_half) > 0:
            # crossover
            # we pick randomly the mother and the father from the best half
            # mother = random.choice(best_half)[0]
            mother = random.choice(best_half)
            best_half.pop(best_half.index(mother))
            # print("mother:", mother)
            # father = random.choice(best_half)[0]
            father = random.choice(best_half)
            best_half.pop(best_half.index(father))
            # print("father:", father)
            child1, child2 = crossover(mother[0], father[0])
            # print("child1:", child1)
            # print("child2:", child2)

            new_generation.append(child1)
            new_generation.append(child2)
            new_generation.append(mother[0])
            new_generation.append(father[0])

            # print("index mother:", best_half.index(mother)[0])
            # # print("index father:", best_half.index(father))
            # best_half.pop(best_half.index(mother)[0])
            # best_half.pop(best_half.index(father))

            # mutation
            # to be implemented

        # calculate the fitness of each strategy
        scores = []
        # print("new_generation:", new_generation)
        for strategy1 in new_generation:
            score = 0
            for strategy2 in new_generation:
                score += fitness_function(strategy1, strategy2)
            scores.append((strategy1, score))

        # sort scores by fitnes descending
        scores.sort(key=lambda x: x[1], reverse=True)

    # return the best strategy
    return scores[0][0]

In [446]:
# this returns the best strategy in a population after the application of the genetic algorithm
# version 2.0: each strategy plays against the optimal function strategy given by the teacher, this because that is the strategy to beat
def evolve_2() -> Strategy:
    # print("Evolve starting")
    # generate random strategies (initial population)
    population = generate_initial_population(INITIAL_POPULATION)
    # print("initial population:", population)

    # calculate the fitness of each strategy
    # each strategy plays against each other and itself NUMBER_OF_FITNESS_GAMES times
    scores = []
    for strategy in population:
        score = fitness_function_2(strategy)
        scores.append((strategy, score))
    
    # sort scores by fitnes descending
    scores.sort(key=lambda x: x[1], reverse=True)
    # print("scores before training:", scores)
    # print("best strategy before training:", scores[0][0])

    # start the genetic algorithm
    for _ in tqdm(range(NUMBER_OF_GENERATIONS)):
        new_generation = []

        # select the best strategies
        # we take the first half of the population (the best half)
        best_half = scores[:len(scores)//2]
        # print("best_half:", best_half)

        # for _ in range(INITIAL_POPULATION/2):
        while len(best_half) > 0:
            # crossover
            # we pick randomly the mother and the father from the best half
            # mother = random.choice(best_half)[0]
            mother = random.choice(best_half)
            best_half.pop(best_half.index(mother))
            # print("mother:", mother)
            # father = random.choice(best_half)[0]
            father = random.choice(best_half)
            best_half.pop(best_half.index(father))
            # print("father:", father)
            child1, child2 = crossover(mother[0], father[0])
            # print("child1:", child1)
            # print("child2:", child2)

            new_generation.append(child1)
            new_generation.append(child2)
            new_generation.append(mother[0])
            new_generation.append(father[0])

            # print("index mother:", best_half.index(mother)[0])
            # # print("index father:", best_half.index(father))
            # best_half.pop(best_half.index(mother)[0])
            # best_half.pop(best_half.index(father))

            # mutation
            # to be implemented

        # calculate the fitness of each strategy
        scores = []
        # print("new_generation:", new_generation)
        for strategy in new_generation:
            score = fitness_function_2(strategy)
            scores.append((strategy, score))

        # sort scores by fitnes descending
        scores.sort(key=lambda x: x[1], reverse=True)

    # return the best strategy
    # print("scores after training: ", scores)
    # print("best strategy after training: ", scores[0][0])
    return scores[0][0]

In [128]:
def choose_based_on_scores(rows_scores: list[float]) -> int:
    # calculates a weighted probability for each row based on the scores
    total = sum(rows_scores)
    probabilities = [score / total for score in rows_scores]
    indices = list(range(len(rows_scores)))
    chosen_index = random.choices(indices, probabilities)[0]
    return chosen_index

## Command to train the agent

In [447]:
best_strategy_1 = evolve()
best_strategy_2 = evolve_2()

print("best_strategy_1:", best_strategy_1)
print("best_strategy_2:", best_strategy_2)


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

best_strategy_1: Strategy(randomness=0.06841591523179158, rows_scores=[0.038533038928596275, 0.16839356357893087, 0.8888780511830323, 0.9872259243037497, 0.9317777569986282], objects_to_take=0.8170411861026605, aggressive=True)
best_strategy_2: Strategy(randomness=0.0716015790481781, rows_scores=[0.32976586329763113, 0.37873854523753214, 0.7145812175502192, 0.349582870681972, 0.6456632133582212], objects_to_take=0.8756517306377082, aggressive=False)


## Function that returns the move to do based on the strategy and the actual state

In [451]:
# def evolution_strategy(state: Nim, strategy: Strategy = best_strategy) -> Nimply:
def evolution_strategy(state: Nim, strategy: Strategy) -> Nimply:
    # generate the possible moves
    allowed_moves = possible_moves(state)
    # print(allowed_moves)

    # decide whether to pick a random move or the best one
    # print("strategy: ", strategy)
    # print("strategy.randomness: ", strategy.randomness)
    if random.random() < strategy.randomness:
        # raise Exception("Debug: Entrato nel ciclo if") 
        selected_move = random.choice(allowed_moves)
        return selected_move
    
    # pick the best move based on the other parameters
    # select the rows that are involved in the possible moves
    not_allowed_rows = set(range(NIM_SIZE)) - set(move.row for move in allowed_moves)
    # print("not_allowed_rows: ", not_allowed_rows)
    
    # select the scores of the rows that are involved in the possible moves
    selected_rows_scores = deepcopy(strategy.rows_scores)
    for i in not_allowed_rows:
        selected_rows_scores[i] = 0
    # print("selected_rows_scores: ", selected_rows_scores)
    
    # choose the best row
    selected_row = choose_based_on_scores(selected_rows_scores)
    # print("selected_row: ", selected_row)

    # choose the best number of objects to take
    # count the number of objects in the selected row at the actual state
    number_of_objects = len([move for move in allowed_moves if move.row == selected_row])
    # print("number_of_objects: ", number_of_objects)

    if strategy.aggressive:
        # choose by rounding up
        selected_objects = math.ceil(strategy.objects_to_take * number_of_objects)
    else:
        # choose by rounding down
        selected_objects = math.floor(strategy.objects_to_take * number_of_objects)
    # print("selected_objects: ", selected_objects)
    # if the result of picking obkects is 0, pick 1 (minimum)
    if selected_objects == 0:
        selected_objects = 1
    elif selected_objects > MAX_OBJECTS:
        selected_objects = MAX_OBJECTS

    # reconstruct the corresponding move
    selected_move = Nimply(selected_row, selected_objects)
    # print("selected_move: ", selected_move)
    return selected_move

In [450]:
# prova versione 2 di evolution_strategy
# # def evolution_strategy(state: Nim, strategy: Strategy = best_strategy) -> Nimply:
# def evolution_strategy(state: Nim, strategy: Strategy) -> Nimply:
#     # generate the possible moves
#     allowed_moves = possible_moves(state)
#     # print(allowed_moves)

#     # decide whether to pick a random move or the best one
#     # print("strategy: ", strategy)
#     # print("strategy.randomness: ", strategy.randomness)
#     if random.random() < strategy.randomness:
#         selected_move = random.choice(allowed_moves)
#         return selected_move
    
#     # pick the best move based on the other parameters
#     # select the rows that are involved in the possible moves
#     not_allowed_rows = set(range(NIM_SIZE)) - set(move.row for move in allowed_moves)
#     # print("not_allowed_rows: ", not_allowed_rows)
    
#     # select the scores of the rows that are involved in the possible moves
#     selected_rows_scores = deepcopy(strategy.rows_scores)
#     for i in not_allowed_rows:
#         selected_rows_scores[i] = 0
#     # print("selected_rows_scores: ", selected_rows_scores)
    
#     # choose the best row
#     selected_row = choose_based_on_scores(selected_rows_scores)
#     # print("selected_row: ", selected_row)

#     # choose the best number of objects to take
#     # count the number of objects in the selected row at the actual state
#     number_of_objects = len([move for move in allowed_moves if move.row == selected_row])
#     # print("number_of_objects: ", number_of_objects)

#     if strategy.aggressive:
#         # choose by rounding up
#         selected_objects = math.ceil(strategy.objects_to_take * number_of_objects)
#     else:
#         # choose by rounding down
#         selected_objects = math.floor(strategy.objects_to_take * number_of_objects)
#     # print("selected_objects: ", selected_objects)
#     # if the result of picking obkects is 0, pick 1 (minimum)
#     if selected_objects == 0:
#         selected_objects = 1
#     elif selected_objects > MAX_OBJECTS:
#         selected_objects = MAX_OBJECTS

#     # reconstruct the corresponding move
#     selected_move = Nimply(selected_row, selected_objects)
#     # print("selected_move: ", selected_move)
#     return selected_move

In [452]:
def nim_match(strategy1, strategy2, nim_size = NIM_SIZE, player = 0, fitness = False, debug = False):
    agents = (strategy1, strategy2)
    if debug:
        print("agent1: ", agents[0])
        print("agent2: ", agents[1])
    nim = Nim(nim_size)
    logging.info(f"init : {nim}")
    if debug:
        print(f"init : {nim}")
    while nim:
        selected_move = evolution_strategy(nim, agents[player])
        logging.info(f"ply: player {player} plays {selected_move}")
        if debug:
            print(f"ply: player {player} plays {selected_move}")
        nim.nimming(selected_move)
        logging.info(f"status: {nim}")
        if debug:
            print(f"status: {nim}")
        player = 1 - player
    logging.info(f"status: Player {player} won!")
    if debug:
        print(f"status: Player {player} won!")
    if fitness:
        return player

In [453]:
# used for testing evolve_2
def nim_match_2(strategy1, strategy2, nim_size = NIM_SIZE, player = 0, fitness = False, debug = False, winner = False):
    agents = (strategy1, strategy2)
    if debug:
        print("agent1: ", agents[0])
        print("agent2: ", agents[1])
    nim = Nim(nim_size)
    logging.info(f"init : {nim}")
    if debug:
        print(f"init : {nim}")
    while nim:
        if player == 0:
            selected_move = agents[player](nim)
        else: 
            selected_move = evolution_strategy(nim, agents[player])
        logging.info(f"ply: player {player} plays {selected_move}")
        if debug:
            print(f"ply: player {player} plays {selected_move}")
        nim.nimming(selected_move)
        logging.info(f"status: {nim}")
        if debug:
            print(f"status: {nim}")
        player = 1 - player
    logging.info(f"status: Player {player} won!")
    if debug or winner:
        print(f"status: Player {player} won!")
    if fitness:
        return player

In [469]:
# nim_match(optimal, best_strategy, debug = True)
matches = 500
wins_1 = 0
wins_2 = 0

for _ in tqdm(range(matches)):
    wins_1 += nim_match_2(optimal, best_strategy_1, debug = False, fitness = True)
    wins_2 += nim_match_2(optimal, best_strategy_2, debug = False, fitness = True)

print("matches played: ", matches)
print("---------------------------------")
print("wins_1: ", wins_1)
print("percentage of wins_1: ", wins_1/matches * 100, "%")
print("---------------------------------")
print("wins_2: ", wins_2)
print("percentage of wins_2: ", wins_2/matches * 100, "%")

  0%|          | 0/100 [00:00<?, ?it/s]

matches played:  100
---------------------------------
wins_1:  29
percentage of wins_1:  28.999999999999996 %
---------------------------------
wins_2:  36
percentage of wins_2:  36.0 %
