Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [2]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [None]:
num = 3
rows = [i * 2 + 1 for i in range(num)]
print(rows)
print(sum(rows))
print("<" + " ".join(str(_) for _ in rows) + ">")

In [15]:
class Nim:
    # define rules of the game
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property  # getter
    def rows(self) -> tuple:
        return tuple(self._rows)

    # applies the move to the game
    def nimming(self, ply: Nimply) -> None:  # you have to pass an object of type Nimply
        row, num_objects = ply
        assert self._rows[row] >= num_objects  # check if there are at least num_objects in the row
        assert self._k is None or num_objects <= self._k  # check if the number of objects to be removed is less than k
        self._rows[row] -= num_objects  # if everything is ok, remove the objects


## Sample (and silly) startegies 

In [5]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [None]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [None]:
possible_moves = [(r, o) for r, c in enumerate(rows) for o in range(1, c + 1)]
print(possible_moves)
#remove the first element from possible_moves
possible_moves.pop(0)
print(max(possible_moves, key=lambda m: (-m[0], m[1])))

In [None]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [None]:
import numpy as np

tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in rows])
# print(rows[1])
# print(tmp[2])

xor = tmp.sum(axis=0) % 2
print(tmp.sum(axis=0))
print(xor)

print(int("".join(str(_) for _ in xor), base=2))

In [63]:
import numpy as np


def nim_sum(state: Nim) -> int:
    # convert the rows to a binary representation in 32 bits
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])

    # compute the xor of all the rows
    xor = tmp.sum(axis=0) % 2

    # convert the xor to an integer
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()

    # for each possible move
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        # copy the object
        tmp = deepcopy(raw)

        # apply the move
        tmp.nimming(ply)

        # compute the nim sum and store it into the dictionary
        cooked["possible_moves"][ply] = nim_sum(tmp)
        print("Cooked:", type(cooked["possible_moves"][ply]))

    # return the dictionary with the "score" obtained by each move
    return cooked

# basically the optimal strategy consists in picking a random move from the ones with nim sum != 0
# it's like a "try to not lose in this turn" strategy
def optimal(state: Nim) -> Nimply:
    # analyze the actual state
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")

    # create a list of the moves with nim sum != 0 (with moves with nim sum == 0 we lose)
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]

    # if there are no spicy moves, we lose, so we pick a random move
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())

    # pick a random spicy move
    ply = random.choice(spicy_moves)
    # print("Ciao" , type(ply))
    return ply   


In [96]:
NUMBER_OF_GENERATIONS = 100
POPULATION_SIZE = 20
TOURNAMENT_SIZE = 10
GAME_SIZE = 5
STARTING_PLAYER = 0


In [101]:

def get_valid_moves(state: Nim) -> list:
    valid_moves = []

    # for row, count in enumerate(state.rows):
    #     for k in range(1, count + 1):
    #         valid_moves.append(Nimply(row, k))
    
    for ply in (Nimply(row, k) for row, count in enumerate(state.rows) for k in range(1, count + 1)):
        tmp = deepcopy(state)
        tmp.nimming(ply)
        # if nim_sum(tmp) != 0:
        valid_moves.append(ply)
    return valid_moves

In [91]:
# prova = Nim(3)
# print(prova.rows)

# valid_moves = get_valid_moves(prova)
# print(valid_moves)
# print(type(valid_moves))

In [107]:
# a random strategy that picks a random move from the valid ones
def create_random_strategy() -> callable:
    def random_strategy(valid_moves) -> Nimply:
        return random.choice(valid_moves)
    return random_strategy

# I can "use" a strategy by calling it with the valid moves -> strategy(valid_moves)


In [105]:
# a population is a list of strategies
# a strategy is a function that takes a list of valid moves and returns a move
def create_initial_population() -> list:
    population = []
    for _ in range(POPULATION_SIZE):
        population.append(create_random_strategy())
    return population

# population = [create_random_strategy(), create_random_strategy(), ..., create_random_strategy()]
# create_random_strategy() = random_strategy(valid_moves) = random.choice(valid_moves)

# a population of strategies consists in a list of strategies, that are objects (of type function) that take a list of valid moves and return a random one

In [99]:

def evaluate_strategy(strategy: callable, state: Nim) -> int:
    score = 0
    opponents = (strategy, optimal)
    player = STARTING_PLAYER
    for _ in range(TOURNAMENT_SIZE):
        game = Nim(GAME_SIZE)
        while game:
            ply = opponents[game]
            game.nimming(ply)
            player = 1 - player
        if player == STARTING_PLAYER:
            score += 1
    return score

In [106]:

def crossover(mother: Nim, father: Nim) -> Nim:
    def new_strategy(valid_moves: list) -> Nimply:
        if random.random() < 0.5:
            return mother(valid_moves)
        else:
            return father(valid_moves)
    return new_strategy


In [108]:

def next_generation(population: list, state: Nim) -> list:
    new_population = []
    scores = []
    scores = [(evaluate_strategy(strategy, state), strategy) for strategy in population]
    for strategy in population: # population is a list of callables (functions)
        score = (evaluate_strategy(strategy, state), strategy)
        scores.append(score)
    scores.sort(reverse=True)
    selected_strategies = [strategy for _, strategy in scores[POPULATION_SIZE // 2]] # select the best half of the population
    while len(new_population) < POPULATION_SIZE:
        father = random.choice(selected_strategies)
        mother = random.choice(selected_strategies)
        new_strategy = crossover(father, mother)
        new_population.append(new_strategy)
    return new_population

In [None]:

def select_best_move() -> Nimply:
    return 0

def genetic_strategy(state: Nim) -> Nimply:
    population = create_initial_population()
    valid_moves = get_valid_moves()
    for _ in range(NUMBER_OF_GENERATIONS):
        population = next_generation(population, valid_moves)
    best_move = select_best_move()
    return best_move


## Genetic Algorithm Strategy

In [53]:
class GeneticAgent:
    def __init__(self, population_size, generations):
        self.population_size = population_size
        self.generations = generations


    # def get_valid_moves(self, state):
    #     valid_moves = []
    #     for r, c in enumerate(state.rows):
    #         for o in range(1, c + 1):
    #             valid_moves.append(Nimply(r, o))
    #     return valid_moves

    def get_valid_moves(self, state: Nim):
        # create an empty list
        valid_moves = []
        for ply in (Nimply(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)):
            # yield ply
            tmp = deepcopy(state)
            tmp.nimming(ply)
            nimsum = nim_sum(tmp)
            if nim_sum != 0:
                valid_moves.append(nimsum)
        return valid_moves

    def evolve(self, state: Nim):
        population = self.create_initial_population()
        # valid_moves = state.get_valid_moves()
        valid_moves = self.get_valid_moves(state)
        print("ciao", type(valid_moves))
        for _ in range(self.generations):
            print(type(valid_moves))
            population = self.next_generation(population, valid_moves)
        return self.select_best_move(population, state)

    def create_initial_population(self):
        population = []
        for _ in range(self.population_size):
            population.append(self.create_random_strategy())
        return population

    def create_random_strategy(self):
        def random_strategy(valid_moves):
            return random.choice(valid_moves)
        return random_strategy

    def evaluate_strategy(self, strategy: Nim, state: Nim):
        # Evaluate the strategy by playing multiple games and obtaining a score based on performance
        score = 0
        for _ in range(10):  # Play 10 games to evaluate strategy
            new_state = deepcopy(state)
            # print(type(state))
            move = strategy(new_state)  # Get a move from the strategy
            
            # print(move)
            new_state.nimming(move)  # Apply the move to the state
            # Update score based on the result of the game
            # Here you can define how to score the performance based on wins or losses
            # For example, increment score if the strategy wins the game
            if new_state.__bool__():
                score += 1
        return score

    def next_generation(self, population, state: Nim):

        scores = [(self.evaluate_strategy(strategy, state), strategy) for strategy in population]
        scores.sort(reverse=True)  # Sort strategies by their scores
        selected_strategies = [strategy for _, strategy in scores[:self.population_size // 2]]
        print(type(state))
        new_population = []
        while len(new_population) < self.population_size:
            parent1 = random.choice(selected_strategies)
            parent2 = random.choice(selected_strategies)
            new_strategy = self.crossover(parent1, parent2)
            new_population.append(new_strategy)
        return new_population

    def crossover(self, strategy1, strategy2):
        def new_strategy(board, valid_moves):
            if random.random() < 0.5:
                return strategy1(board, valid_moves)
            else:
                return strategy2(board, valid_moves)
        return new_strategy

    def select_best_move(self, population, state: Nim):
        scores = [(self.evaluate_strategy(strategy, state), strategy) for strategy in population]
        best_strategy = max(scores, key=lambda x: x[0])[1]
        return best_strategy

# Replace `genetic` with an instance of `GeneticAgent`
genetic = GeneticAgent(population_size=20, generations=50)

# Define the rest of your code and call `genetic.evolve(state)` where you wish to make the agent select its move.
    

In [60]:
logging.getLogger().setLevel(logging.INFO)

nim = Nim(4)
logging.info(f"init : {nim}")

genetic = GeneticAgent(population_size=20, generations=50)  # Istanzia l'agente genetico

player = 0
while nim:
    if player == 0:
        ply = optimal(nim)  # Giocatore 0 utilizza la strategia ottimale
    else:
        ply = genetic.evolve(nim)  # Giocatore 1 utilizza la strategia evoluta dall'agente genetico

    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player

logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7>
INFO:root:ply: player 0 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 7>


<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
ciao <class 'list'>
<class 'list'>


AttributeError: 'list' object has no attribute 'nimming'

## Oversimplified match

In [15]:
logging.getLogger().setLevel(logging.INFO)

# define the two players
strategy = (optimal, pure_random)

nim = Nim(4)
logging.info(f"init : {nim}")

# first player to move is player 0
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=3)
INFO:root:status: <1 3 5 4>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 4>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=2)
INFO:root:status: <0 3 3 4>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=2)
INFO:root:status: <0 3 1 4>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=3)
INFO:root:status: <0 0 1 4>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=2)
INFO:root:status: <0 0 1 2>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 0 2>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 1>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 0>
INFO:root:status: Player 1 won!
