Copyright **`(c)`** 2023 Alessandro Chiabodo `<s309234@studenti.polito.it>`  
[`https://github.com/AChiabodo/compIntelligence`](https://github.com/AChiabodo/compIntelligence)  
Free for personal use;


## LAB 2 - Nim

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

### Notes
The nim-sum can be used to "test" our algorithm given that it provides the "best" move to do in a given state. It should NOT be part of our algorithm, but it can be used to test it.

In [None]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from itertools import product
import pickle
import math
import numpy as np
from enum import Enum

In [None]:
Nimply = namedtuple("Nimply", "row, num_objects")

class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    # returns the list of columns that have at least one object 
    # special case of self.bigger_rows(0)
    def non_zero_rows(self) -> list:
        return [True if x != 0 else False for x in self.rows]

    # returns the list of columns that have more than N object
    def bigger_rows(self , n : int = 1) -> list:
        return [True if x > n else False for x in self.rows]


    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        #print(f"nimming {ply}")
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

### Player Interface
Defines the main methods that each "player" should implement to the trained and used
Note that for "dumb" players only the play method is required

In [None]:
class Player():
    def __init__(self, k : int = None):
        self.k = k

    def play(self,state : Nim) -> Nimply :
        pass

    def __str__(self):
        return self.__class__.__name__
    
    def mutation(self,mutation_rate):
        pass

    def compute_fitness(self, state):
        pass

### Util Functions

In [None]:
def multiple_matches(player1 : Player,player2 : Player,matches = 100,game = Nim(5)):
    wins = [0,0]
    players = [player1,player2]
    for _ in range(matches):
        nim = deepcopy(game)
        i = random.choice([0,1])
        while nim :
            ply = players[i].play(nim)
            nim.nimming(ply)
            i = 1 - i
        wins[i] += 1
    return wins

In [None]:
def generate_lists(numbers):
    n = len(numbers)

    all_combinations = list(product(range(0, max(numbers) + 1), repeat=n))

    valid_combinations = [list(combination) for combination in all_combinations if all(x <= y for x, y in zip(combination, numbers))]
    
    return valid_combinations[1:] #avoid the full zeros solution

def random_moves(states : list,k : int = None) -> dict: 
    rules = dict()
    for state in states:
        if k is None:
            rules[tuple(state)] = random.choice([(r, o) for r, c in enumerate(state) for o in range(1, c + 1)])
        else:
            rules[tuple(state)] = random.choice([(r, o) for r, c in enumerate(state) for o in range(1, min(k, c) + 1)])
    return rules

### Generic Players Definitions
Here I'm defining some "agents" that will play the game in various ways, used to train and benchmark our algorithm

In [None]:
##Player class that makes completely random moves in the game.
##It selects a random non-empty row and removes a random number of objects from that row.
class RandomPlayer(Player):
    def __init__(self,k : int = None) -> None:
        super().__init__(k)
    def play(self,state : Nim) -> Nimply :
        """A completely random move"""
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        if self.k is None:
            num_objects = random.randint(1, state.rows[row])
        else:
            num_objects = random.randint(1, min(self.k, state.rows[row]))
        return Nimply(row, num_objects)

class MaximumPlayer(Player):
    def __init__(self, k: int = None):
        super().__init__(k)
    
    def play(self,state : Nim) -> Nimply :
        """Pick always the maximum possible number of the lowest row"""
        possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
        if self.k is not None:
            possible_moves = [(r, o) for r, o in possible_moves if o <= self.k]
        return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))
    

In [None]:
class OptimalPlayer(Player):
    def __init__(self,k: int = None) -> None:
        self._k = k
        
    def nim_sum(self,state: Nim) -> int:
        tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
        xor = tmp.sum(axis=0) % 2
        return int("".join(str(_) for _ in xor), base=2)

    def analize(self,raw: Nim) -> dict:
        cooked = dict()
        cooked["possible_moves"] = dict()
        for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
            logging.debug(f"ply: {ply}")
            tmp = deepcopy(raw)
            tmp.nimming(ply)
            cooked["possible_moves"][ply] = self.nim_sum(tmp)
        return cooked

    def play(self,state: Nim) -> Nimply:
        analysis = self.analize(state)
        logging.debug(f"analysis:\n{pformat(analysis)}")
        if self._k is None:
            spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
        else:
            spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0 and ply[1] <= self._k]
        if not spicy_moves:
            spicy_moves = list(analysis["possible_moves"].keys())
        ply = random.choice(spicy_moves)
        return ply

In [None]:
multiple_matches(OptimalPlayer(k=3),RandomPlayer(),matches=100)

In [None]:
class EasyRuleBasedPlayer(Player):
    def non_zero_tuples(tup : tuple) -> list:
        return [True if x != 0 else False for x in tup]

    ### Trying to find some rules to play with at Nim
    def play(self,state : Nim,debug_mode = False) -> Nimply:
        rows = state.rows
        num_rows = sum(state.non_zero_rows())
        selected_row = None
        selected_amount = None
        if num_rows > 2 :
            for i in range(len(rows)):
                if rows[i] != 0 and (selected_amount is None or selected_amount > rows[i]):
                    selected_amount = rows[i]
                    selected_row = i
            if debug_mode :
                print(f"Selected row: {selected_row}")
                print(f"Selected amount: {selected_amount}")
            return Nimply(selected_row,selected_amount)
        if num_rows == 2 :
            for i in range(len(rows)):
                if rows[i] == 1 :
                    return Nimply(i,rows[i])
                if rows[i] != 0 and (selected_amount is None or selected_amount > rows[i]):
                    selected_amount = rows[i]
                    selected_row = i
            if debug_mode :
                print(f"Selected row: {selected_row}")
                print(f"Selected amount: {selected_amount}")
            return Nimply(selected_row,selected_amount)
        if num_rows == 1 :
            for i in range(len(rows)):
                if rows[i] != 0 :
                    if debug_mode:
                        print(f"Selected row: {i}")
                        print(f"Selected amount: {max(rows[i] - 1,1)}")
                    return Nimply(i,max(rows[i] - 1,1))

In [None]:
class AdvancedRuleBasedPlayer():
    def __init__(self,k: int = None) -> None:
        self._k = k
        
    def nim_sum(self,state: Nim) -> int:
        tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
        xor = tmp.sum(axis=0) % 2
        return int("".join(str(_) for _ in xor), base=2)

    def analize(self,raw: Nim) -> dict:
        cooked = dict()
        cooked["possible_moves"] = dict()
        for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
            logging.debug(f"ply: {ply}")
            tmp = deepcopy(raw)
            tmp.nimming(ply)
            cooked["possible_moves"][ply] = self.nim_sum(tmp)
        return cooked
    
    def play(self,state: Nim) -> Nimply:
        if sum(state.bigger_rows()) <= 1: #we've only 1 column with more than 1 object
            if sum(state.non_zero_rows()) == 1: #last column ... win ?
                for i in range(len(state.rows)):
                    if state.rows[i] != 0:
                        return Nimply(i,max(1 , state.rows[i] - 1) )
            else:
                selected_amount = None
                selected_row = None
                for i in range(len(state.rows)):
                    if state.rows[i] != 0 and (selected_amount is None or selected_amount < state.rows[i]):
                        selected_amount = state.rows[i]
                        selected_row = i
                if sum(state.non_zero_rows()) % 2 == 0:
                    return Nimply(selected_row,max(1,selected_amount))
                else:
                    return Nimply(selected_row,max(1,selected_amount - 1 ))
        else:
            analysis = self.analize(state)
            if self._k is None:
                spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
            else:
                spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0 and ply[1] <= self._k]
            if not spicy_moves:
                spicy_moves = list(analysis["possible_moves"].keys())
            ply = random.choice(spicy_moves)
            return ply

In [None]:
class SimulativePlayer(Player):
    def __init__(self,k: int = None) -> None:
        super().__init__(k)

    def number_wins(self,state : Nim):
    #    return multiple_matches(RandomPlayer(),RandomPlayer(),matches=10,game=state)[0]
        strategy = (RandomPlayer().play, RandomPlayer().play)
        player_wins = [0, 0]
        for i in range(10):
            nim = deepcopy(state)
            #logging.info(f"init : {nim}")
            player = 0
            while nim:
                ply = strategy[player](nim)
                #logging.info(f"ply: player {player} plays {ply}")
                nim.nimming(ply)
                #logging.info(f"status: {nim}")
                player = 1 - player
            player_wins[player] += 1
        return player_wins[0]

    def play(self,state : Nim) :
        possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
        best_move , best_sum = None, math.inf
        for move in possible_moves:
            if self.k is not None and move[1] > self.k:
                continue
            tmp = deepcopy(state)
            tmp.nimming(Nimply(*move))
            tmp_sum = self.number_wins(tmp)
            if (best_sum > tmp_sum and best_sum != 0) or best_move is None:
                best_move, best_sum = move, tmp_sum
        return Nimply(*best_move)

In [None]:
# With advanced rule based player we've surpassed the previous optimal player
multiple_matches(SimulativePlayer(),AdvancedRuleBasedPlayer(),matches=100)

### *intelligent* Player

In [271]:
class PlayerNimStates(Player):

    def __init__(self,rules : dict,k : int = None) -> None:
        self.rules = rules
        self.fitness = None
        super().__init__(k)

    def play(self, state: Nim) -> Nimply:
        return self.rules[state.rows]
    
    def mutation(self,mutation_rate) -> Player:
        for state in self.rules.keys():
            if random.random() < mutation_rate :
                self.rules[state] = random.choice([(r, o) for r, c in enumerate(state) for o in range(1, c + 1)])
        return self

    def crossover(self,other : Player,other_weight = 0.5):
        for state in self.rules.keys():
            self.rules[state] = random.choices([self.rules[state],other.rules[state]],weights=[1-other_weight,other_weight])[0]

    def compute_fitness(self, state):
        self.fitness = 0
        self.fitness += multiple_matches(self,SimulativePlayer(self.k),matches=50,game=state)[0]
        self.fitness += multiple_matches(self,AdvancedRuleBasedPlayer(self.k),matches=50,game=state)[0]
        return self.fitness
    
    def eval(self, state):
        return multiple_matches(self,SimulativePlayer(),matches=100,game=state)[0]
    
    def load_player(self, path):
        with open(path, 'rb') as f:
            self.rules = pickle.load(f)
        return self
    
    def save_player(self, path):
        with open(path, 'wb') as f:
            pickle.dump(self.rules, f)

In [None]:
class NimPlayerTraining():

    class trainingType(Enum):
        DEFAULT = 0
        LAMBDA = 1
        TOURNAMENT = 2

    def __init__(self, size : int , pop_size : int ,training_type : trainingType = trainingType.DEFAULT, lam : int = None , k : int = None) -> None:
        self.state = Nim(size)
        self.population = []
        self.weights = []
        self.k = k
        self.training_type = training_type
        match self.training_type :
            case self.trainingType.DEFAULT:
                self.generation = self.generation_default
                self.ration = 0.8
            case self.trainingType.LAMBDA:
                self.generation = self.generation_lambda
                self.lam = min(lam,pop_size) # edge case with lam > pop_size
                self.ration = 0.4
                self.temperature = 0.9
            case self.trainingType.TOURNAMENT: # not implemented
                self.generation = self.generation_tournament
                self.tournament_size = 4
        self.possible_states = generate_lists(list(self.state.rows))
        self.init_population(pop_size)

    def init_population(self, size : int) -> None:
        print("Initializing population")
        for _ in range(size):
            temp = PlayerNimStates(random_moves(self.possible_states,self.k))
            temp.compute_fitness(self.state)
            self.population.append(temp)

    def generation_default(self) -> None:
        for _ in range(len(self.population)):
            n = random.random()
            temp : Player = deepcopy(random.choice(self.population))
            if n > self.ration :
                temp.crossover(random.choice(self.population))
            else:
                temp.mutation(0.1)
            temp.compute_fitness(self.state)
            self.population.append(temp)    
        self.population.sort(key=lambda x : x.fitness , reverse=True)
        self.population = self.population[:len(self.population)//2]
        self.ration = 0.9 * self.ration

    def generation_lambda(self) -> None:
        parents = []
        # (mu,lambda) approach with lambda = mu for semplicity
        for _ in range(self.lam): # Selecting parents randomly
            parents.append(self.population.pop(random.randint(0,len(self.population)-1)) )
        for parent in parents:
            temp_population = []
            temp_population.append(deepcopy(parent))
            for _ in range(self.lam):
                temp : Player = deepcopy(parent).mutation(self.ration)
                temp.compute_fitness(self.state)
                temp_population.append(temp)  
            temp_population.sort(key=lambda x : x.fitness , reverse=True)
            self.population.append(temp_population[0])  
        self.population.sort(key=lambda x : x.fitness , reverse=True)
        self.ration = self.temperature * self.ration

    def train(self,epoch = 10):
        fitness = []
        avg_fitness = []
        print("Starting Training")
        for _ in range(epoch):
            print("Generation : ",_)
            self.generation()
            print(f"Best fitness : {self.population[0].fitness} , eval : {self.population[0].eval(self.state)}")
            fitness.append(self.population[0].fitness)
            avg_fitness.append(sum([x.fitness for x in self.population])/len(self.population))
        return self.population[0] , fitness , avg_fitness

### Training Phase
Here's the training part for the NimPlayer  
Given the time required to train an optimal player, this part is *disabled* if the variable is not changed

In [None]:
### Constants
NIM_SIZE = 4
TRAIN = False
POP_SIZE = 50
### Training
if TRAIN:
    default_trainer = NimPlayerTraining(NIM_SIZE,POP_SIZE,NimPlayerTraining.trainingType.DEFAULT)
    lambda_trainer = NimPlayerTraining(NIM_SIZE,POP_SIZE,NimPlayerTraining.trainingType.LAMBDA,lam=5)
    test_player , fitness_lists , avg_fitness = default_trainer.train(45)
    lambda_player , fitness_lists , avg_fitness = lambda_trainer.train(40)

### Player Test
It is possible to load a pre-trained player to test against an expert agent.

In [272]:
loaded_player = PlayerNimStates([]).load_player("test_player.pkl")

In [282]:
print("win ratio against AdvancedRuleBasedPlayer : ",multiple_matches(loaded_player,AdvancedRuleBasedPlayer(),matches=100,game=Nim(NIM_SIZE))[0])
print("win ratio against Simulative Player : ",multiple_matches(loaded_player,SimulativePlayer(),matches=100,game=Nim(NIM_SIZE))[0])
print("win ratio against Random Player : ",multiple_matches(loaded_player,RandomPlayer(),matches=100,game=Nim(NIM_SIZE))[0])

win ratio against AdvancedRuleBasedPlayer :  62
win ratio against Simulative Player :  66
win ratio against Random Player :  88
