Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [None]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from numpy import random as rnd
import scipy
from queue import PriorityQueue


## The *Nim* and *Nimply* classes

In [None]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [None]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies

In [None]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [None]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [None]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [None]:
#OUR STRATEGIES

def N_stg(state: Nim) -> Nimply:
    ply = random.choice([Nimply(r, n) for r, n in enumerate(state.rows) if n != 0])
    
    return ply

def NO_stg(state: Nim) -> Nimply:

    moveset = [Nimply(r, n-1) for r, n in enumerate(state.rows) if n != 0 and n > 1]

    if not len(moveset): # se non ci sono heap con più di un match
        ply = pure_random(state)
        return ply

    ply = random.choice(moveset)

    return ply

def NM_stg(state: Nim) -> Nimply:
    ply1 = random.choice([Nimply(r, n) for r, n in enumerate(state.rows) if n != 0])
    ply2 = random.choice([Nimply(r, n) for r, n in enumerate(state.rows) if n != 0])

    if ply1 == ply2: # se ottengo per caso lo stesso heap 
        ply = pure_random(state)
        return ply

    ply = Nimply(ply1.row, ply2.num_objects)

    if ply2.num_objects > ply1.num_objects:
        ply = Nimply(ply2.row, ply1.num_objects)
    
    return ply

def NMO_stg(state: Nim) -> Nimply:
    moveset = [Nimply(r, n) for r, n in enumerate(state.rows) if n != 0 and n > 1]

    if not len(moveset): # se non ho trovato heap con più di un match
        ply = pure_random(state)
        return ply

    ply1 = random.choice(moveset)
    ply2 = random.choice(moveset)
    
    if ply1 == ply2: # se ottengo per caso lo stesso heap
        ply = pure_random(state)
        return ply

    ply = Nimply(ply1.row, ply2.num_objects-1)

    if ply2.num_objects > ply1.num_objects:
        ply = Nimply(ply2.row, ply1.num_objects-1)

    return ply

def O_stg(state: Nim) -> Nimply:
    ply = random.choice([Nimply(r, 1) for r, n in enumerate(state.rows) if n != 0])
    
    return ply


In [None]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

def metaStrategy(state: Nim) -> Nimply:
    global fun_list

    ones = 0
    nones = 0
    nones_idx = 0
    first = 0
    target_idx = -1
    nozeros = 0
    count = 0

    for r, h in enumerate(state.rows):
        if h == 1: ones += 1
        if h > 1: nones += 1
        if h > 1: nones_idx = r

        if first != 0 and h != 0 and first != h: count, target_idx = (count + 1, r)
        if first == 0 and h != 0: target_idx, first = (r, h)
        if h != 0: nozeros += 1

    if ones % 2 != 0 and nones == 1:
        ply = Nimply(nones_idx, state.rows[nones_idx]-1)
        print("tutti 1 (dispari) tranne 1", state.rows, ply)
    elif ones % 2 == 0 and ones > 0 and nones == 1:
        ply = Nimply(nones_idx, state.rows[nones_idx])
        print("tutti 1 (pari) tranne 1", state.rows, ply)
    elif ones % 2 == 0 and ones == 0 and nones == 1:
        ply = Nimply(nones_idx, state.rows[nones_idx]-1)
        print("quelli a 1 pari e uno solo > 1", state.rows, ply)
    elif nozeros % 2 != 0 and count == 0 and ones == 0:
        ply = Nimply(target_idx, state.rows[target_idx])
        print("tutti uguali (dispari)", state.rows, ply)
    elif nozeros % 2 != 0 and count == 1:
        ply = Nimply(target_idx, state.rows[target_idx])
        print("tutti uguali (pari) tranne 1", state.rows, ply)
    else:
        stg = random.choices( [ st for _, st in fun_list], weights = [w for w, _ in fun_list])[0]
        ply = stg(state)
        print(f"Uso la strategia {stg.__name__}", state.rows, ply)
    return ply


In [None]:
# se ho un numero pari di heap a 1 e una sola a n > 1, prend n -1
# se ho un numero dispri di heap a > 1, prendo un'intera colonna

# prendo 2 prendo n - 2 da quello più numero
def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

## Oversimplified match

In [None]:
logging.getLogger().setLevel(logging.INFO)

fun_list = [(0.1, NMO_stg), (0.3, O_stg), (0.4, N_stg), (0.2, NO_stg)]
strategy = (metaStrategy, metaStrategy)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


In [None]:
fun_list_orig = [(0.4, N_stg), (0.5, NM_stg), (0.1, optimal)]
fun_list = fun_list_orig.copy()
strategy = (optimal, metaStrategy)

best_list = fun_list.copy()
best_win_perc = 0

In [None]:
NUM_MATCHES = 500       #500 500
NUM_LOCAL_RUNS = 1      #40  1
NUM_RUNS = 20           #10  400
NUM_GENS = 10          #1   1

ELITES_MAX_NUM = 10
elites_fun_list = []

tweak_factor_orig = 1   #1
tweak_factor = tweak_factor_orig
tweak_factor_final = 1  #1
twaek_factor_loss = (tweak_factor_orig - tweak_factor_final) / NUM_LOCAL_RUNS
population_fun_queue = PriorityQueue()
#print(twaek_factor_loss)
for gen in range(NUM_GENS):
  print("gen #",gen)
  #se elites vuoto, creo NUM_RUNS random
  population_fun_list = []
  while population_fun_queue.qsize() > 0 and len(population_fun_list) < ELITES_MAX_NUM:
    eli = population_fun_queue.get()[1]
    print("copio elite")
    #print("asd",(ELITES_MAX_NUM - len(population_fun_list) + 1 // 2))
    #[population_fun_list.append(eli) for _ in range((ELITES_MAX_NUM - len(population_fun_list) + 1 // 2))]
    population_fun_list.append(eli)
  population_fun_queue = PriorityQueue() #resetto la coda a priorità

  if len(population_fun_list) > 0:
    # metto qui la mutation
    elem = population_fun_list[random.choice(range(len(population_fun_list)))]
    while len(population_fun_list) < NUM_RUNS:
      offspring = elem.copy()

      tweak = rnd.normal(loc=0, scale= tweak_factor/len(fun_list_orig))
      index = random.choice(range(len(fun_list_orig)))

      old_weights = [w for w, _ in offspring]
      old_weights[index] += tweak

      new_weights = scipy.special.softmax(old_weights, axis=None)
      offspring = [ (new_weights[idx], st ) for idx, ( _ , st) in enumerate(offspring)]
      population_fun_list.append(offspring)
  else:
    while len(population_fun_list) < NUM_RUNS:
      rand_weights = scipy.special.softmax([random.choice(range(10)) for _ in range(len(fun_list_orig))], axis=None)
      population_fun_list.append([ (rand_weights[idx], st ) for idx, ( _ , st) in enumerate(fun_list_orig)])
  #decremento il tweak factor
  tweak_factor -= twaek_factor_loss

  for fun_list in population_fun_list:
      print("best:", best_win_perc)

      #local_best_fun_list = fun_list.copy()
      #local_best_win_perc = 0
      #tweak_factor = tweak_factor_orig
      #for _ in range(NUM_LOCAL_RUNS):
          #print("local best:",local_best_win_perc)
          #print("local best list:",local_best_fun_list)
          #fun_list = local_best_fun_list.copy()

      num_win = 0
      for _ in range(NUM_MATCHES):

          nim = Nim(5)
          player = 0
          while nim:
              ply = strategy[player](nim)
              print(f"Sto usando la strategia {strategy[player].__name__}")
              nim.nimming(ply)
              player = 1 - player

          if player:# se vinco
              num_win += 1

      #aggiorno best
      perc_run = num_win / NUM_MATCHES
      #print("",perc_run)
      if perc_run > best_win_perc:
        best_list = fun_list.copy()
        best_win_perc = perc_run
      # provo ad aggiungerlo agli elite
      population_fun_queue.put((perc_run, fun_list.copy()))



In [None]:
best_list

In [None]:
best_win_perc

In [None]:
# TEST
fun_list = best_list
strategy = (optimal, metaStrategy)
num_win = 0
print(fun_list)
for _ in range(NUM_MATCHES):

    nim = Nim(5)
    player = 0
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player

    if player:# se vinco
        num_win += 1

#aggiorno best
perc_run = num_win / NUM_MATCHES
print(perc_run)