Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a._k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [16]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
import math
import numpy as np
from tqdm.notebook import tqdm


## The *Nim* and *Nimply* classes

In [17]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [18]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        if k is not None:
            self._k=k
        else:
            self._k=sum(self.rows)

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:#controlla se si può fare quella mossa e la fa fare
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [19]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [20]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [21]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [22]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    #logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

In [23]:
def generate_unique_array_from_function(n, initial_state):
    #here i generate the population, so it is composed by dstinct member
    unique_set = set()
    unique_array = []

    while len(unique_set) < n:
        value = pure_random(initial_state)
        _, m = value
        if  value not in unique_set and m <= initial_state._k:
            unique_set.add(value)
            unique_array.append(value) ## BASTA il set

    return unique_array

In [24]:
import sys

def strategy_1(initial_state):
    new_state = deepcopy(initial_state)

    righe_non_zero = [(indice, numero) for indice, numero in enumerate(new_state.rows) if numero != 0]

    numero_righe_non_zero = len(righe_non_zero)
    indice, bstn = righe_non_zero[0]

    if numero_righe_non_zero == 1 and bstn != 1:
        #if there is 1 row left I make a different move, so i don't use evolutaionary alghoritm
        return Nimply(indice, bstn - 1)
    else:
        return optimal(initial_state)

def strategy_2(initial_state):
    return optimal(initial_state)

def strategy_3(initial_state):
    analysis = analize(initial_state)
    #logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

def strategy_4(initial_state):
    actual_nimsum = nim_sum(initial_state)
    possible_solutions = generate_unique_array_from_function(sum(initial_state.rows), initial_state)
    min = sys.maxsize
    min_sol = initial_state
    for sol in possible_solutions:
        tmp = deepcopy(initial_state)
        tmp.nimming(sol)
        if nim_sum(tmp) == actual_nimsum:
            return sol
        if abs(actual_nimsum - nim_sum(tmp)) < min:
            min = abs(actual_nimsum - nim_sum(tmp))
            min_sol = sol
    return min_sol

def strategy_5(initial_state):
    return gabriele(initial_state)

def strategy_6(initial_state):
    row = initial_state.rows.index(max(initial_state.rows))
    return Nimply(row,random.randint(1, initial_state.rows[row]))

In [25]:
def evaluate_strategy(strategy):
    win_counter=0

    for i in range(50):
        nim = Nim(5)
        player = 0
        while nim:
            if player == 0:
                if i % 2 == 0:
                    ply = optimal(nim)
                else:
                    ply = gabriele(nim)
                nim.nimming(ply)
            else:
                ply = strategy(nim)
                nim.nimming(ply)
            player = 1 - player
        if player == 1:
            win_counter += 1

    for i in range(50):
        nim = Nim(5)
        player = 0
        while nim:
            if player == 1:
                if i % 2 == 0:
                    ply = optimal(nim)
                else:
                    ply = gabriele(nim)
                nim.nimming(ply)
            else:
                ply = strategy(nim)
                nim.nimming(ply)
            player = 1 - player
        if player == 0:
            win_counter += 1
            
    return win_counter

In [26]:
def weighted_random_choice(weights):
  
    pesi_normalizzati = weights - np.min(weights)
    total_peso = np.sum(pesi_normalizzati)
    probabilita_normalizzate = pesi_normalizzati / total_peso
    indice_selezionato = np.random.choice(len(weights), p = probabilita_normalizzate)    
    return indice_selezionato

def weight_mean(weights, fitness):
    weight_summed = np.sum(weights * fitness)
    sum_weight = np.sum(weights)
    
    # Calcola la media pesata
    mean = weight_summed / sum_weight
    return mean

def fitness(x, tmp_array):
   if x.ndim == 1:
    result_array = np.zeros((1,))
    result_array[0] = weight_mean(x, tmp_array)
   else:
    result_array = np.zeros((x.shape[0],))
    for i in range(x.shape[0]): 
        result_array[i]= weight_mean(x[i],tmp_array)
   
    
   return result_array

def generate_weights(strategies, tmp_array, λ = 20, σ = 0.001):
    solution = abs(np.random.random(6)) 
    best_so_far = np.copy(solution)

    for n in range(100000 // λ):
    # offspring <- select λ random points mutating the current solution
        offspring =abs( (
         np.random.normal(loc=0, scale=σ, size=(λ, 6))) + solution
        )
    # evaluate and select best
    
        evals = fitness(offspring,tmp_array)
        solution = offspring[np.argmax(evals)]
      
    if fitness(best_so_far,tmp_array) < fitness(solution,tmp_array):
         best_so_far = np.copy(solution)
    return best_so_far

def generate_weights_adaptive(tmp_array):
    λ = 100

    solution = abs(np.random.random(7)) 
    solution[6]=1

    best_so_far = np.copy(solution)
    for n in range(100000 // λ):
        # offspring <- select λ random points mutating the current solution
        offspring =abs( (
        np.random.normal(loc=0, scale=solution[6], size=(λ, 7))) + solution
        )
    # evaluate and select best
    
        evals = fitness(offspring[:,:-1],tmp_array)
        solution = offspring[np.argmax(evals)]
    
        if fitness(best_so_far[:6],tmp_array) < fitness(solution[:6],tmp_array):
           best_so_far = np.copy(solution)

    return best_so_far[:6],best_so_far[6]
def generate_weights_comma(strategies, tmp_array, λ = 20, σ = 0.001):
    solution = abs(np.random.random(6)) 
    best_so_far = np.copy(solution)

    for n in range(100000 // λ):
    # offspring <- select λ random points mutating the current solution
        offspring =abs( (
         np.random.normal(loc=0, scale=σ, size=(λ, 6))) + solution
        )
    # evaluate and select best
    
        evals = fitness(offspring,tmp_array)
        solution = offspring[np.argmax(evals)]
      
        best_so_far = np.copy(solution)
    return best_so_far
def generate_weights_adaptive_comma(tmp_array):
    λ = 100

    solution = abs(np.random.random(7)) 
    solution[6]=1
    best_so_far = np.copy(solution)
    for n in range(100000 // λ):
        # offspring <- select λ random points mutating the current solution
        offspring =abs( (
        np.random.normal(loc=0, scale=solution[6], size=(λ, 7))) + solution
        )
    # evaluate and select best
    
        evals = fitness(offspring[:,:-1],tmp_array)
        print(offspring.shape)
        solution = offspring[np.argmax(evals)]
    
        best_so_far = np.copy(solution)

    return best_so_far[:6],best_so_far[6]

In [27]:
def test(strategies,weights):
    win_counter=0

    for i in range(50):
        nim = Nim(5)
        player = 0
        while nim:
            if player == 0:
                if i % 2 == 0:
                    ply = optimal(nim)
                else:
                    ply = pure_random(nim)
                nim.nimming(ply)
            else:
                ply = strategies[weighted_random_choice(weights)](nim)
                nim.nimming(ply)
            player = 1 - player
        if player == 1:
            win_counter += 1
           
    for i in range(50):
        nim = Nim(5)
        player = 0
        while nim:
            if player == 1:
                if i % 2 == 0:
                    ply = optimal(nim)
                else:
                    ply = gabriele(nim)
                nim.nimming(ply)
            else:
                ply = strategies[weighted_random_choice(weights)](nim)
                nim.nimming(ply)
            player = 1 - player
        if player == 0:
            win_counter += 1
            
    return win_counter
    

## Oversimplified match

In [28]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
print(nim.rows)
logging.info(f"init : {nim}")
player = 0
while nim:
    if player==0:
        ply = optimal(nim)
        logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        logging.info(f"status: {nim}")
    else:
        ply = strategy_6(nim)
        logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 7 9>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=2)
INFO:root:status: <0 3 5 7 7>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 2 5 7 7>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=6)
INFO:root:status: <0 2 5 1 7>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=3)
INFO:root:status: <0 2 2 1 7>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=6)
INFO:root:status: <0 2 2 1 1>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <0 0 2 1 1>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 1 1 1>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 1 0 1>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 0 0 1>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=1)
INFO:root:status: <0 0

(1, 3, 5, 7, 9)


In [29]:
#evaluate_strategy(strategy_2)
""""
tmp_array = np.zeros((6,))
vett_strategies=[strategy_1,strategy_2,strategy_3,strategy_4,strategy_5,strategy_6]

for j in range(6) :
    tmp_array[j]=evaluate_strategy(vett_strategies[j])
weights=generate_weights(vett_strategies,tmp_array)

print(test(vett_strategies,weights=weights))
vett_strategies_=[strategy_1,strategy_1,strategy_1,strategy_1,strategy_1,strategy_1]
test(vett_strategies_,weights=weights)

"""

'"\ntmp_array = np.zeros((6,))\nvett_strategies=[strategy_1,strategy_2,strategy_3,strategy_4,strategy_5,strategy_6]\n\nfor j in range(6) :\n    tmp_array[j]=evaluate_strategy(vett_strategies[j])\nweights=generate_weights(vett_strategies,tmp_array)\n\nprint(test(vett_strategies,weights=weights))\nvett_strategies_=[strategy_1,strategy_1,strategy_1,strategy_1,strategy_1,strategy_1]\ntest(vett_strategies_,weights=weights)\n\n'

In [30]:
sigma=[1,0.1,0.01,0.001,0.0001]
vett_strategies=[strategy_1,strategy_2,strategy_3,strategy_4,strategy_5,strategy_6]
tmp_array = np.zeros((6,))
for j in range(6) :
    tmp_array[j]=evaluate_strategy(vett_strategies[j])

for i in sigma:
    weights=generate_weights(vett_strategies,tmp_array,σ=i)
    print("for σ=",i,"we have ",test(vett_strategies,weights=weights),"win")

vett_strategies_=[strategy_1,strategy_1,strategy_1,strategy_1,strategy_1,strategy_1]
test(vett_strategies_,weights=weights)

weights_,σ_=generate_weights_adaptive(tmp_array)
print("using adaptive we obtain σ = ", σ_," with  ",test(vett_strategies,weights_)," win" )

weights_,σ_=generate_weights_adaptive_comma(tmp_array)

print("using adaptive with comma lambda we obtain σ = ", σ_," with  ",test(vett_strategies,weights_)," win" )

for i in sigma:
    weights=generate_weights_comma(vett_strategies,tmp_array,σ=i)
    print("using comma lambda for σ=",i,"we have ",test(vett_strategies,weights=weights),"win")

for σ= 1 we have  69 win
for σ= 0.1 we have  66 win
for σ= 0.01 we have  58 win
for σ= 0.001 we have  64 win
for σ= 0.0001 we have  46 win
using adaptive we obtain σ =  3.2694241475521375e-16  with   61  win
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)
(100, 7)


In [31]:
ar=np.random.random(7)
print(ar[:6])


[0.34486568 0.72469447 0.67230942 0.10829135 0.03467962 0.87983111]
