Copyright **`(c)`** 2023 Alessandro Chiabodo `<s309234@studenti.polito.it>`  
[`https://github.com/AChiabodo/compIntelligence`](https://github.com/AChiabodo/compIntelligence)  
Free for personal use;


## LAB 2 - Nim

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

### Notes
The nim-sum can be used to "test" our algorithm given that it provides the "best" move to do in a given state. It should NOT be part of our algorithm, but it can be used to test it.

In [None]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from itertools import product
import pickle
import math
import numpy as np

In [None]:
Nimply = namedtuple("Nimply", "row, num_objects")

class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    def non_zero_rows(self) -> list:
        return [True if x != 0 else False for x in self.rows]

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        #print(f"nimming {ply}")
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [None]:
def generate_lists(numbers):
    n = len(numbers)

    all_combinations = list(product(range(0, max(numbers) + 1), repeat=n))

    valid_combinations = [list(combination) for combination in all_combinations if all(x <= y for x, y in zip(combination, numbers))]
    
    return valid_combinations[1:] #avoid the full zeros solution

def random_moves(states : list,k : int = None) -> dict: 
    rules = dict()
    for state in states:
        if k is None:
            rules[tuple(state)] = random.choice([(r, o) for r, c in enumerate(state) for o in range(1, c + 1)])
        else:
            rules[tuple(state)] = random.choice([(r, o) for r, c in enumerate(state) for o in range(1, min(k, c) + 1)])
    return rules

### Player Interface

In [None]:
class Player():
    def __init__(self, k):
        self.k = k

    def play(self,state : Nim) -> Nimply :
        pass

    def __str__(self):
        return self.__class__.__name__
    
    def mutation(self,mutation_rate):
        pass

    def compute_fitness(self, state):
        pass

In [None]:
def multiple_matches(player1 : Player,player2 : Player,matches = 100,game = Nim(5)):
    wins = [0,0]
    players = [player1,player2]
    for _ in range(matches):
        nim = deepcopy(game)
        i = random.choice([0,1])
        while nim :
            ply = players[i].play(nim)
            nim.nimming(ply)
            i = 1 - i
        wins[i] += 1
    return wins

In [None]:
class RandomPlayer(Player):
    def __init__(self,k) -> None:
        super().__init__()
    def play(self,state : Nim) -> Nimply :
        """A completely random move"""
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        num_objects = random.randint(1, state.rows[row])
        return Nimply(row, num_objects)
    
class gabriele(Player):
    def play(self,state : Nim) -> Nimply :
        """Pick always the maximum possible number of the lowest row"""
        possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
        return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))
    

In [272]:
class OptimalPlayer(Player):
    def __init__(self,k: int = None) -> None:
        self._k = k
        
    def nim_sum(self,state: Nim) -> int:
        tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
        xor = tmp.sum(axis=0) % 2
        return int("".join(str(_) for _ in xor), base=2)

    def analize(self,raw: Nim) -> dict:
        cooked = dict()
        cooked["possible_moves"] = dict()
        for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
            logging.debug(f"ply: {ply}")
            tmp = deepcopy(raw)
            tmp.nimming(ply)
            cooked["possible_moves"][ply] = self.nim_sum(tmp)
        return cooked

    def play(self,state: Nim) -> Nimply:
        analysis = self.analize(state)
        logging.debug(f"analysis:\n{pformat(analysis)}")
        if self._k is None:
            spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
        else:
            spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0 and ply[1] <= self._k]
        if not spicy_moves:
            spicy_moves = list(analysis["possible_moves"].keys())
        ply = random.choice(spicy_moves)
        return ply

In [273]:
multiple_matches(OptimalPlayer(k=3),RandomPlayer(),matches=100)

[71, 29]

In [None]:
class RuleBasedPlayer(Player):
    def non_zero_tuples(tup : tuple) -> list:
        return [True if x != 0 else False for x in tup]

    ### Trying to find some rules to play with at Nim
    def play(self,state : Nim,debug_mode = False) -> Nimply:
        rows = state.rows
        num_rows = sum(state.non_zero_rows())
        selected_row = None
        selected_amount = None
        if num_rows > 2 :
            for i in range(len(rows)):
                if rows[i] != 0 and (selected_amount is None or selected_amount > rows[i]):
                    selected_amount = rows[i]
                    selected_row = i
            if debug_mode :
                print(f"Selected row: {selected_row}")
                print(f"Selected amount: {selected_amount}")
            return Nimply(selected_row,selected_amount)
        if num_rows == 2 :
            for i in range(len(rows)):
                if rows[i] == 1 :
                    return Nimply(i,rows[i])
                if rows[i] != 0 and (selected_amount is None or selected_amount > rows[i]):
                    selected_amount = rows[i]
                    selected_row = i
            if debug_mode :
                print(f"Selected row: {selected_row}")
                print(f"Selected amount: {selected_amount}")
            return Nimply(selected_row,selected_amount)
        if num_rows == 1 :
            for i in range(len(rows)):
                if rows[i] != 0 :
                    if debug_mode:
                        print(f"Selected row: {i}")
                        print(f"Selected amount: {max(rows[i] - 1,1)}")
                    return Nimply(i,max(rows[i] - 1,1))

In [274]:
class SimulativePlayer(Player):
    def __init__(self,k: int = None) -> None:
        self._k = k
    def number_wins(self,state : Nim):
        return multiple_matches(RandomPlayer(),RandomPlayer(),matches=10,game=state)[0]

    def play(self,state : Nim) :
        possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
        best_move , best_sum = None, math.inf
        for move in possible_moves:
            if self._k is not None and move[1] > _k:
                continue
            tmp = deepcopy(state)
            tmp.nimming(Nimply(*move))
            tmp_sum = self.number_wins(tmp)
            if (best_sum > tmp_sum and best_sum != 0) or best_move is None:
                best_move, best_sum = move, tmp_sum
        return Nimply(*best_move)

In [None]:
class PlayerNimStates(Player):

    def __init__(self,rules : dict) -> None:
        self.rules = rules
        self.fitness = None

    def play(self, state: Nim) -> Nimply:
        return self.rules[state.rows]
    
    def mutation(self,mutation_rate):
        for state in self.rules.keys():
            if random.random() < mutation_rate :
                self.rules[state] = random.choice([(r, o) for r, c in enumerate(state) for o in range(1, c + 1)])

    def crossover(self,other : Player,other_weight = 0.5):
        for state in self.rules.keys():
            self.rules[state] = random.choices([self.rules[state],other.rules[state]],weights=[1-other_weight,other_weight])[0]

    def compute_fitness(self, state):
        self.fitness = 0
        self.fitness += multiple_matches(self,RandomPlayer(),matches=25,game=state)[0]
        self.fitness += multiple_matches(self,OptimalPlayer(),matches=25,game=state)[0]
        self.fitness += multiple_matches(self,SimulativePlayer(),matches=25,game=state)[0]
        self.fitness += multiple_matches(self,RuleBasedPlayer(),matches=25,game=state)[0]
        return self.fitness
    
    def eval(self, state):
        return multiple_matches(self,SimulativePlayer(),matches=100,game=state)[0]

In [244]:
class TrainingPlayerNimStates():
    def __init__(self, size : int , pop_size : int , lam : int = None , k : int = None) -> None:
        self.state = Nim(size)
        self.population = []
        self.weights = []
        self.ration = 0.8
        self._k = k
        if lam is None :
            self.generation = self.generation_default
        else :
            self.generation = self.generation_lambda
            self.lam = lam
            self.temperature = 0.9
        self.possible_states = generate_lists(list(self.state.rows))
        self.init_population(pop_size)

    def init_population(self, size : int) -> None:
        print("Initializing population")
        for _ in range(size):
            temp = PlayerNimStates(random_moves(self.possible_states,self._k))
            temp.compute_fitness(self.state)
            self.population.append(temp)

    def generation_default(self) -> None:
        for _ in range(len(self.population)):
            n = random.random()
            temp = deepcopy(random.choice(self.population))
            if n > self.ration :
                temp.crossover(random.choice(self.population))
            else:
                temp.mutation(0.1)
            temp.compute_fitness(self.state)
            self.population.append(temp)    
        self.population.sort(key=lambda x : x.fitness , reverse=True)
        self.population = self.population[:len(self.population)//2]
        self.ration = 0.9 * self.ration

    def generation_lambda(self) -> None:
        parent = self.population.pop(random.randint(0,len(self.population)-1)) #purely random selection
        temp_population = []
        temp_population.append(parent)
        for _ in range(self.lam):
            temp = PlayerNimStates.mutation(parent,self.ration)
            temp.fitness = temp.compute_fitness(self.state)
            temp_population.append(temp)  
        temp_population.sort(key=lambda x : x.fitness , reverse=True)
        self.population.append(temp_population[0])  
        self.population.sort(key=lambda x : x.fitness , reverse=True)
        #self.population = self.population[:len(self.population)//2]
        self.ration = self.temperature * self.ration

    def train(self,epoch = 10):
        for _ in range(epoch):
            print("Generation : ",_)
            self.generation_default()
            print(self.population[0].eval(self.state) , self.population[0].fitness)
        return self.population[0]

In [None]:
test_trainer = TrainingPlayerNimStates(4,40)

In [None]:
test_player = test_trainer.train(10)

In [245]:
lambda_trainer = TrainingPlayerNimStates(4,20,5)

Initializing population


In [246]:
lambda_player = lambda_trainer.train(10)

Generation :  0
48 63
Generation :  1
53 67
Generation :  2
55 73
Generation :  3
54 73
Generation :  4
58 73
Generation :  5
60 73
Generation :  6
59 73
Generation :  7
61 75
Generation :  8
65 83
Generation :  9
56 83


In [258]:
multiple_matches(lambda_player,OptimalPlayer(),matches=100,game=Nim(4))

[63, 37]