Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [None]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [None]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [None]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [None]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [None]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [None]:
import math
import numpy as np

def fitness(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.1}
    

def evolution(state: Nim) -> Nimply:
    """ (1,lambda) evolution strategy """
    lambda_ = 10
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    best_move , best_sum = None, math.inf
    for _ in range(lambda_):
        row, num_objects = random.choice(possible_moves)
        tmp = deepcopy(state)
        tmp.nimming(Nimply(row, num_objects))
        tmp_sum = fitness(tmp)
        if (best_sum > tmp_sum and best_sum != 0) or best_move is None:
            best_move, best_sum = (row, num_objects), tmp_sum
    return Nimply(*best_move)


In [None]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        logging.debug(f"ply: {ply}")
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [None]:
#logging.getLogger().setLevel(logging.INFO)

strategy = (gabriele, simulation_choice)
player_wins = [0, 0]
for i in range(100):
    nim = Nim(7)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    player_wins[player] += 1
print(player_wins)
#logging.info(f"status: Player {player} won!")


In [None]:
def number_wins(state : Nim):
    strategy = (pure_random, pure_random)
    player_wins = [0, 0]
    for i in range(10):
        nim = deepcopy(state)
        #logging.info(f"init : {nim}")
        player = 0
        while nim:
            ply = strategy[player](nim)
            #logging.info(f"ply: player {player} plays {ply}")
            nim.nimming(ply)
            #logging.info(f"status: {nim}")
            player = 1 - player
        player_wins[player] += 1
    return player_wins[0]


In [None]:
def simulation_choice(state : Nim) :
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    best_move , best_sum = None, math.inf
    for move in possible_moves:
        tmp = deepcopy(state)
        tmp.nimming(Nimply(*move))
        tmp_sum = number_wins(tmp)
        if (best_sum > tmp_sum and best_sum != 0) or best_move is None:
            best_move, best_sum = move, tmp_sum
    return Nimply(*best_move)

In [None]:
nim = Nim(4)
while nim :
    possible_moves = [(r, o) for r, c in enumerate(nim.rows) for o in range(1, c + 1)]
    ply = simulation_choice(nim)
    nim.nimming(ply)
    print(len(possible_moves))
    input()

In [43]:
class PlayerNim():
    def __init__(self,weights,moves) -> None:
        self.weights = weights
        self.rules = dict()
        self.moves = moves
        
        for i,move in enumerate(moves) :
            self.rules[move] = self.weights[i]

    def play(self,state : Nim) -> Nimply :
        possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
        best_move , best_sum = None, math.inf
        for move in possible_moves:
            tmp_sum = self.rules[move]
            if best_sum > tmp_sum or best_move is None:
                best_move, best_sum = move, tmp_sum
        return Nimply(*best_move)
    
    @staticmethod
    def crossover(player1,player2):
        weights = []
        for i in range(len(player1.weights)) :
            weights.append(random.choice([player1.weights[i],player2.weights[i]]))
        return PlayerNim(weights,player1.moves)
    
    @staticmethod
    def mutation(player,mutation_rate):
        weights = []
        for i in range(len(player.weights)) :
            if random.random() < mutation_rate :
                weights.append(random.random())
            else :
                weights.append(player.weights[i])
        return PlayerNim(weights,player.moves)

    def fitness(self):
        wins = [0,0]
        for _ in range(100):
            nim = Nim(4)
            i = 0
            while nim :
                if i % 2 == 0:
                    ply = self.play(nim)
                else :
                    ply = pure_random(nim)
                nim.nimming(ply)
                i = 1 - i
            wins[i] += 1
        return wins[0]
        

class TrainingPlayerNim():
    def __init__(self, size : int) -> None:
        self.state = Nim(size)
        self.moves = [(r, o) for r, c in enumerate(self.state.rows) for o in range(1, c + 1)]
        self.population = []
        self.weights = []
        self.init_population(20)

    def init_population(self, size : int) -> None:
        for _ in range(size):
            self.population.append(PlayerNim([random.random() for _ in range(len(self.moves))],self.moves))

    def generation(self) -> None:
        self.population.sort(key=lambda x : x.fitness() , reverse=True)
        self.population = self.population[:len(self.population)//2]
        for _ in range(len(self.population)):
            n = random.random()
            if n < 0.5 :
                self.population.append(PlayerNim.crossover(random.choice(self.population),random.choice(self.population)))
            else:
                self.population.append(PlayerNim.mutation(random.choice(self.population),0.1))

    def train(self) :
        for _ in range(40):
            self.generation()
            print(self.population[0].fitness())
        
            


In [44]:
generation = TrainingPlayerNim(4)
generation.train()


53
62
65
71
75
61
68
67
80
74
