Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [348]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
import numpy as np
from random import randint


## The *Nim* and *Nimply* classes

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [381]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None: #ply is move remove "num_object" object from row "row"
        row, num_objects = ply
        assert num_objects>0
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [382]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [383]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [713]:

TRAIN_EPOCHS = 1000
ACCURACY = 100
LAMBDA = 5
MAX_RAND = 20

def softmax(x,xtot):
    return np.exp(x)/sum(np.exp(np.array(xtot)))

def adaptive(state: Nim,comparison_strategy) -> Nimply:
    """A strategy that can adapt its parameters"""
    #since I have no idea about the variance to use I apply self adaptation
    l = len(state.rows)
    #genome = {"win_ratio": 0.5, "sigma_win": 0.01, "p":[1/l for _ in range(l)], "sigma_p":[3/l for _ in range(l)]}
    genome = {"past_win":0.5,"p":[softmax(1/l,[1/l for _ in range(l)]) for _ in range(l)], "sigma_p":[4/l for _ in range(l)]}

    evolutionary = None
    for step in range(TRAIN_EPOCHS):
        
        
        result = [] #vector of tuples containing (n_win,[p1,p2,p3,p4,p5,...])
        for child in range(LAMBDA): #for each child define a new play_function with a different set of parameters

            #suggested at lesson
            lr_p = 1/np.sqrt(step+1)
            #separate learning rate, gaussian mutated for each p in the vector
            
            new_sigma_p = [genome["sigma_p"][i]*np.exp(lr_p*random.normalvariate()) for i in range (len(genome["sigma_p"]))]
            new_p = [p+random.normalvariate(sigma=new_sigma_p[i]) for i,p in enumerate(genome["p"])]
            new_p = [softmax(p,new_p) for p in new_p]

            #logging.info(step)
            
            def evolutionary(state):
                
                prow = new_p
                row_map = sorted(enumerate(state.rows),key=lambda r: r[1])#numero di elementi crescente
                num_objects=0
                n_rand = 0
                
                while num_objects == 0:
                    row = np.random.choice(range(len(prow)),p=prow)
                    num_objects=state.rows[row_map[row][0]]
                    n_rand+=1
                    if n_rand>=MAX_RAND:
                        max_el = max(state.rows)
                        return Nimply(*(state.rows.index(max_el),max_el))
                
                return Nimply(*(row_map[row][0],num_objects))
                    
            win_count=0
            for i in range (ACCURACY): 

                strategy = (comparison_strategy, evolutionary)
                player = i%2
                state = Nim(5)
                #simulate game
                while state:
                
                    ply = strategy[player](state)
                    state.nimming(ply)
                    player = 1 - player
                    
                #add victory
                if (1-player)==1:#modified rules
                    win_count+=1
            
            result.append((win_count/ACCURACY,new_p,new_sigma_p))

        #1st attempt (1,LAMBDA) #60%
        #best_child = max(result,key=lambda c: c[0] )
        #genome = {"past_win":best_child[0] , "p":best_child[1] , "sigma_p":new_sigma_p}

        #2nd attempt (1+LAMBDA) (keep code of 2nd attempt)
        result.append((genome["past_win"],genome["p"],genome["sigma_p"]))
        best_child = max(result,key=lambda c: c[0] )
        genome = {"past_win":best_child[0] , "p":best_child[1] , "sigma_p":best_child[2]}
        

    #logging.info(f"win ratio:{genome['win_ratio']}")
    logging.info(f"genome:{genome}")

    def evolutionary(state):
        
        prow = genome["p"]
        row_map = sorted(enumerate(state.rows),key=lambda r: r[1])#numero di elementi crescente
        num_objects=0
        n_rand = 0

        while num_objects == 0:
            row = np.random.choice(range(len(prow)),p=prow)
            num_objects=state.rows[row_map[row][0]]
            n_rand+=1
            if n_rand>=MAX_RAND:
                max_el = max(state.rows)
                return Nimply(*(state.rows.index(max_el),max_el))
        
        return Nimply(*(row_map[row][0],num_objects))

    return evolutionary
    



In [587]:
import numpy as np

logging.getLogger().setLevel(logging.INFO)




def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2 #a sum[0,1]%2 is a xor
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())#basically at random between all plays
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [583]:
#1
strategy = ( optimal, optimal)#half win
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#2
strategy = ( optimal, pure_random )#always win
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#3
strategy = ( pure_random, pure_random )#50%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#4
strategy = ( gabriele, pure_random )#50% no -> better >70%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#5
strategy = ( gabriele, gabriele )#50%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change


#6
strategy = ( gabriele, optimal )#50%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change


INFO:root:status: Player 0 won 500 times!
INFO:root:status: Player 0 won 1000 times!
INFO:root:status: Player 0 won 502 times!
INFO:root:status: Player 0 won 762 times!
INFO:root:status: Player 0 won 500 times!
INFO:root:status: Player 0 won 0 times!


In [702]:

#4 -> better test
strategy = ( gabriele, pure_random )#50% no -> better >70%
count = 0 
for i in range(10000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

INFO:root:status: Player 0 won 7621 times!


In [712]:
logging.getLogger().setLevel(logging.INFO)
#it doesn't make sense to train on optimal at least in first phase

#nim = Nim(5)
evolutionary=adaptive(nim,pure_random)

nim = Nim(5)
strategy = (pure_random, evolutionary)

TEST_SAMPLE = 10000

logging.info(f"init : {nim}")
player = 0
win_count = 0
for i in range(TEST_SAMPLE):
    nim = Nim(5)
    player = i%2
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    
    if (1-player)==1:
        win_count+=1


logging.info(f"status: evolutionary result: {win_count} won!")



INFO:root:genome:{'past_win': 0.91, 'p': [0.384249863485209, 0.22158842562762432, 0.22650985590920084, 0.15212204277265962, 0.015529812205306153], 'sigma_p': [1.0004558032774495, 0.24745396641626868, 0.2917381228897459, 0.20056299197427388, 0.9734738582898244]}
INFO:root:init : <1 3 5 7 9>
INFO:root:status: evolutionary result: 8209 won!


In [None]:
#if i can remove from highest 
#to obtain xor of other elements -> found best move
#else
#highest element has for sure highest bit at 1


