Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [4]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
import numpy as np
from random import randint,choice


## The *Nim* and *Nimply* classes

In [5]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [6]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)
    @property
    def rows_l(self) -> list:
        return self._rows

    def nimming(self, ply: Nimply) -> None: #ply is move remove "num_object" object from row "row"
        row, num_objects = ply
        assert num_objects>0
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

All the following code address the proble to solve with a good probability of success (around 99%).
The version optimized is the one in witch the the player who plays the optimal solution always win (50% only if also the other is playing optimal) so in order to obtain this result the Nim sum should always be == 0
and the player who wins is the one that remove last piece(s).


In [7]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [8]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))



# Evolutionary strategy
* STRATEGY 1: computes the xor of all elements (number of match for row) except maximum one
  than subtract the xor from the max if xor is lower than the maximum (no parameter to optimize)
* STRATEGY 2: find the objective probability pvec=[p0,p1,...,pn-1] to remove all elements from the 
  smallest i row, and then use it
  * Example:
    p0 is the probability of removing all from the smallest <br>
    p1 is the probability of removing all form the 2nd smallest row <br>
    pn-1 is the probability of removing all from largest row <br>
  
  Other techniques such as softmax(pvec) are used to normalize the vector such that sum(pvec) = 1 and for each i pi between (0,1) <br>
  


In [68]:
#depth of genealogical tree, if TRAIN_EPOCHS too high can lead to underflow
TRAIN_EPOCHS = 10
#accuracy with which I want to measure the win rate of my strategy
ACCURACY = 10000
#number of "child" in offspring
LAMBDA = 10
#limit to the number of random number generation 
#1. p can be approximatively 0 for some rows <e-40
#2. some p can be invalid (ex. I have at least 1 row with 0 matches)
MAX_RAND = 100

def softmax(x,xtot):
    return np.exp(x)/sum(np.exp(np.array(xtot)))

def adaptive(grid:Nim,comparison_strategy):
    """A strategy that can adapt its parameters"""
    #since I have no idea about the variance to use I apply self adaptation
    state = deepcopy(grid)
    l = len(state.rows)
   
    #uniform start
    prob0 = [1/l for _ in range(l)]
    var0 = [3/l for _ in range(l)]

    genome = {"past_win":0.5,"p":[prob0[i] for i in range(l)], "sigma_p":[var0[i] for i in range(l)]}
    
    #win ratio is phenotype, not genome, but is practivcal to add it to genome
    evolutionary = None
    
    for step in range(TRAIN_EPOCHS):
        
        
        result = [] #vector of tuples containing (n_win,[p1,p2,p3,p4,p5,...])
        for child in range(LAMBDA): #for each child define a new play_function with a different set of parameters

            #suggested at lesson
            lr_p = 1/np.sqrt(step+1)
            #separate learning rate, gaussian mutated for each p in the vector
            
            new_sigma_p = [genome["sigma_p"][i]*np.exp(lr_p*random.normalvariate()) for i in range (len(genome["sigma_p"]))]
            new_p = [p+random.normalvariate(sigma=new_sigma_p[i]) for i,p in enumerate(genome["p"])]
            new_p = [softmax(p,new_p) for p in new_p]

            #logging.info(step)
            
            def evolutionary(state):
                
                max_val = max(deepcopy(state.rows_l))
                state_no_max=[nel for nel in deepcopy(state.rows_l)]
                state_no_max.remove(max_val)
                #code of nim_sum applied just to all rows except the one with max number of matches
                tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state_no_max ])
                xor = tmp.sum(axis=0) % 2 #a sum[0,1]%2 is a xor
                xor_val = int("".join(str(_) for _ in xor), base=2)

                #DIM is size passed to Nim constuctor -> Nim(DIM)
                if xor_val<max_val:
                    #if possible apply a good "greedy" strategy -> O(32xDIM)
                    #(32 is cost of sum along rows)
                    return Nimply(*(state.rows_l.index(max_val),max_val-xor_val))
                else:
                    #have to find a good linear strategy for this case (optimal is brute force O(32xDIMxDIMxDIM) -> we want O(32xDIMxN_RAND))
                    #brute force is O(32xDIMxDIM) since I have to check with nim_sum all the sum(i*2+1) for i in range(DIM)
                    #n.moves = 1+3+5+7+9+11+13+... 
                    #1 -> 1
                    #2 -> 1+3=4
                    #3 -> 1+3+5=4+5=9
                    #4 -> 1+3+5+7=9+7=16
                    #5 -> 1+3+5+7+9=16+9=25
                    #6 -> 1+3+5+7+9+11=25+11=36
                    #7 -> 1+3+5+7+9+11+13=36+13=49
                    #...
                    #strategy 2
                    prow = new_p # newly generated random mutation
                    row_map = sorted(enumerate(state.rows),key=lambda r: r[1])# ordered by growing number of elements (the original order may not correspond)
                    num_objects = 0 # useful to stop cicle in case of finding a valid row
                    n_rand = 0 # stop generating after n_rand >= RAND_MAX
                                
                    while num_objects == 0:

                        row = np.random.choice(range(len(prow)),p=prow) # random choice with probability p[i] to pick element i 
                        num_objects = state.rows[row_map[row][0]] # row_map[row][0] is the actual row, row_map[row][1] is the number of elements in the actual row 
                        
                        n_rand+=1

                        if n_rand>=MAX_RAND:

                            first_invalid_index = l #find the number of remaining row !=0
                            for i in range(l-1,-1,-1):
                                if row_map[i][1] == 0:
                                    first_invalid_index = i
                                    break
                            
                            more_probable_left_value = max(prow[first_invalid_index+1:l]) # choose among the remainings the one with max probability
                            more_probable_left = row_map[prow.index(more_probable_left_value)] # retrive (orig_column,n_matches_orig_column)
                            return Nimply(*(more_probable_left[0],more_probable_left[1])) #remove all matches from the most probabale

                    return Nimply(*(row_map[row][0],num_objects))    

            win_count=0
            for i in range (ACCURACY): 

                strategy = (comparison_strategy, evolutionary)
                player = i%2
                state = deepcopy(grid)#reset grid
                #simulate game
                while state:
                
                    ply = strategy[player](state)
                    state.nimming(ply)
                    player = 1 - player
                    
                #add victory
                if (1-player)==1:#modified rules
                    win_count+=1
            
            result.append((win_count/ACCURACY,new_p,new_sigma_p))

        #1st attempt (1,LAMBDA) #60%
        #best_child = max(result,key=lambda c: c[0] )
        #genome = {"past_win":best_child[0] , "p":best_child[1] , "sigma_p":new_sigma_p}

        #2nd attempt (1+LAMBDA) (keep code of 2nd attempt)
        result.append((genome["past_win"],genome["p"],genome["sigma_p"]))
        best_child = max(result,key=lambda c: c[0] )
        genome = {"past_win":best_child[0] , "p":best_child[1] , "sigma_p":best_child[2]}
        

    #logging.info(f"win ratio:{genome['win_ratio']}")
    logging.info(f"genome:{genome}")

    def evolutionary(state):
        #same as previous except line with comment
        max_val = max(state.rows)
        max_val = max(deepcopy(state.rows_l))
        state_no_max=[nel for nel in deepcopy(state.rows_l)]
        state_no_max.remove(max_val)
        tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state_no_max ])
        xor = tmp.sum(axis=0) % 2 
        xor_val = int("".join(str(_) for _ in xor), base=2)
        if xor_val<max_val:
            return Nimply(*(state.rows_l.index(max_val),max_val-xor_val))
        else:
            prow = genome["p"]#taken by genome
            row_map = sorted(enumerate(state.rows),key=lambda r: r[1])
            num_objects=0
            n_rand = 0
            while num_objects == 0:
                row = np.random.choice(range(len(prow)),p=prow)
                num_objects=state.rows[row_map[row][0]]
                n_rand+=1
                if n_rand>=MAX_RAND:
                    first_invalid_index = l
                    for i in range(l-1,-1,-1):
                        if row_map[i][1] == 0:
                            first_invalid_index = i
                            break
                    more_probable_left_value = max(prow[first_invalid_index+1:l])
                    more_probable_left = row_map[prow.index(more_probable_left_value)]
                    return Nimply(*(more_probable_left[0],more_probable_left[1])) 
            
            return Nimply(*(row_map[row][0],num_objects))

    return evolutionary
    



NOTE: optimal function has changed in this version of Nim

In [10]:
import numpy as np

logging.getLogger().setLevel(logging.INFO)




def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2 #a sum[0,1]%2 is a xor
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())#basically at random between all plays
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

NOTE: 1-player wins implies that player who wins is the last to remove piece(s)

In [11]:
#1
strategy = ( optimal, optimal)#half win
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#2
strategy = ( optimal, pure_random )#always win
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#3
strategy = ( pure_random, pure_random )#50%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#4
strategy = ( gabriele, pure_random )#50% no -> better >70%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#5
strategy = ( gabriele, gabriele )#50%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change


#6
strategy = ( gabriele, optimal )#50%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change


INFO:root:status: Player 0 won 5000 times!
INFO:root:status: Player 0 won 1000 times!
INFO:root:status: Player 0 won 480 times!
INFO:root:status: Player 0 won 779 times!
INFO:root:status: Player 0 won 500 times!
INFO:root:status: Player 0 won 0 times!


NOTE: gabriele strategy reveal as a good strategy only for this version of Nim

In [12]:

#4 -> better test
strategy = ( gabriele, pure_random )#50% no -> better >70%
count = 0 
for i in range(10000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if (1-player)==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

INFO:root:status: Player 0 won 7655 times!


# Training


In [69]:
#train
logging.getLogger().setLevel(logging.INFO)
#it doesn't make sense to train on optimal at least in first phase

nim = Nim(7)
evolutionary=adaptive(nim,pure_random)




INFO:root:genome:{'past_win': 0.9924, 'p': [0.0034152162850083706, 0.00191667111433166, 0.0016054633859851064, 0.007933697365537439, 0.9818414334621578, 0.0018430832399723832, 0.0014444351470073022], 'sigma_p': [3.055279916662707, 0.42463799193054746, 0.062466636164550865, 2.3865773595545487, 2.600026416211226, 0.1168868412140859, 0.4189884094400319]}


# Performance test

In [70]:
#test
nim = Nim(7)
strategy = (pure_random, evolutionary)
#strategy 2 win more than gabriele

TEST_SAMPLE = 100000

logging.info(f"init : {nim}")

win_count = 0
for i in range(TEST_SAMPLE):
    nim = Nim(7)
    player = i%2
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    
    if (1-player)==1:
        win_count+=1


logging.info(f"status: evolutionary result: {win_count/TEST_SAMPLE*100}% won!")
#around 99% with Nim(7)

INFO:root:init : <1 3 5 7 9 11 13>
INFO:root:status: evolutionary result: 99.194% won!


# First strategy standalone test

In [18]:
def good_greedy(state):
        
    max_val = max(deepcopy(state.rows_l))
    state_no_max=[nel for nel in deepcopy(state.rows_l)]
    state_no_max.remove(max_val)
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state_no_max ])
    xor = tmp.sum(axis=0) % 2 #a sum[0,1]%2 is a xor
    xor_val = int("".join(str(_) for _ in xor), base=2)
        
    if xor_val<max_val:
        return Nimply(*(state.rows_l.index(max_val),max_val-xor_val))
    else:
        #have to find a good linear strategy for this case (optimal is brute force O(DIMxDIM) -> we want O(DIM))
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        return Nimply(*(row, state.rows[row]))
    

strategy = (pure_random, good_greedy)

TEST_SAMPLE = 100000

nim = Nim(7)
logging.info(f"init : {nim}")

win_count = 0
for i in range(TEST_SAMPLE):
    nim = Nim(7)
    player = i%2
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    
    if (1-player)==1:
        win_count+=1


logging.info(f"status: good_greedy result: {win_count/TEST_SAMPLE*100}% won!")
#96.5%

INFO:root:init : <1 3 5 7 9 11 13>
INFO:root:status: good_greedy result: 96.503% won!


In [19]:
logging.getLogger().setLevel(logging.INFO)
#train againist gabriele

nim = Nim(5)
evolutionary=adaptive(nim,gabriele)

nim = Nim(5)
strategy = (gabriele, evolutionary)

TEST_SAMPLE = 10000

logging.info(f"init : {nim}")

win_count = 0
for i in range(TEST_SAMPLE):
    nim = Nim(5)
    player = i%2
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    
    if (1-player)==1:
        win_count+=1

logging.info(f"status: evolutionary result: {win_count} won!")
#training againist gabriele -> lead to 50/50 win first time

#againist gabriele it always win since always execute first startegy (that put in safe condition the opponent)


INFO:root:genome:{'past_win': 1.0, 'p': [6.965857624572689e-36, 7.395199290538448e-42, 1.0, 1.4515858843350588e-43, 9.264196711159191e-46], 'sigma_p': [46.674365406576555, 7.7412135789972245, 35.183244607025614, 4.059243549459197, 27.821583003159287]}
INFO:root:init : <1 3 5 7 9>
INFO:root:status: evolutionary result: 10000 won!


# Slide version
### Task 2.1
### nim_sum != 0 and the player who cannot take a match wins

In [71]:
import numpy as np

logging.getLogger().setLevel(logging.INFO)




def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2 #a sum[0,1]%2 is a xor
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())#basically at random between all plays
    ply = random.choice(spicy_moves)
    return ply


## Comparing the 3 given strategy, in the case of winner = who cannot remove matches

In [73]:
#1
strategy = ( optimal, optimal)#half win
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if player==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#2
strategy = ( optimal, pure_random )#always win
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if player==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#3
strategy = ( pure_random, pure_random )#50%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if player==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#4
strategy = ( gabriele, pure_random )#50% no -> better >70%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if player==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change

#5
strategy = ( gabriele, gabriele )#50%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if player==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change


#6
strategy = ( gabriele, optimal )#50%
count = 0 
for i in range(1000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if player==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change


#conclusion: strategy is optimal in the extent that leaves the possibility to play a move with ns==0 to the opponent

INFO:root:status: Player 0 won 490 times!
INFO:root:status: Player 0 won 751 times!
INFO:root:status: Player 0 won 508 times!
INFO:root:status: Player 0 won 220 times!
INFO:root:status: Player 0 won 500 times!
INFO:root:status: Player 0 won 110 times!


In [198]:
def not_gabriele(state: Nim) -> Nimply: # gabriele didn't use an optimal strategy for this rules-> pick 1 from lowest row is better
    """Pick always 1 from the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*(max(possible_moves, key=lambda m: (-m[1]))[0],1))


#4
strategy = ( not_gabriele, pure_random )#50% no -> better >70%
count = 0 
for i in range(10000):
    
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player=i%2
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player #next player
    if player==0:
        count+=1

logging.info(f"status: Player {0} won {count} times!")#rule change


INFO:root:status: Player 0 won 6079 times!


In [119]:
#depth of genealogical tree, if TRAIN_EPOCHS too high can lead to underflow
TRAIN_EPOCHS = 100
#accuracy with which I want to measure the win rate of my strategy
ACCURACY = 1000
#number of "child" in offspring
LAMBDA = 3
#limit to the number of random number generation 
#1. p can be approximatively 0 for some rows <e-40
#2. some p can be invalid (ex. I have at least 1 row with 0 matches)
MAX_RAND = 100

def softmax(x,xtot):
    return np.exp(x)/sum(np.exp(np.array(xtot)))

def adaptive(grid:Nim,comparison_strategy):
    """A strategy that can adapt its parameters"""
    #since I have no idea about the variance to use I apply self adaptation
    state = deepcopy(grid)
    l = len(state.rows)
   
    #uniform start
    prob0 = [1/l for _ in range(l)]
    var0 = [3/l for _ in range(l)]

    genome = {"past_win":0.0,"p":[prob0[i] for i in range(l)], "sigma_p":[var0[i] for i in range(l)]}
    
    #win ratio is phenotype, not genome, but is practivcal to add it to genome
    evolutionary = None
    
    for step in range(TRAIN_EPOCHS):
        
        
        result = [] #vector of tuples containing (n_win,[p1,p2,p3,p4,p5,...])
        for child in range(LAMBDA): #for each child define a new play_function with a different set of parameters

            #suggested at lesson
            lr_p = 1/np.sqrt(step+1)
            #separate learning rate, gaussian mutated for each p in the vector
            
            new_sigma_p = [genome["sigma_p"][i]*np.exp(lr_p*random.normalvariate()) for i in range (len(genome["sigma_p"]))]
            new_p = [p+random.normalvariate(sigma=new_sigma_p[i]) for i,p in enumerate(genome["p"])]
            new_p = [softmax(p,new_p) for p in new_p]

            #logging.info(step)
            
            def evolutionary(state):
                 
                #strategy 2
                prow = new_p # newly generated random mutation
                row_map = sorted(enumerate(state.rows), key=lambda r: r[1])# ordered by growing number of elements (the original order may not correspond)
                num_objects = 0 # useful to stop cicle in case of finding a valid row
                n_rand = 0 # stop generating after n_rand >= RAND_MAX
                
                while num_objects == 0:

                    row = np.random.choice(range(len(prow)),p=prow) # random choice with probability p[i] to pick element i 
                    num_objects = state.rows[row_map[row][0]] # row_map[row][0] is the actual row, row_map[row][1] is the number of elements in the actual row 
                        
                    n_rand+=1

                    if n_rand>=MAX_RAND:

                        first_invalid_index = l-1 #find the number of remaining row !=0
                        for i in range(l-1,-1,-1):
                            if row_map[i][1] == 0:
                                first_invalid_index = i
                                break
                            
                        more_probable_left_value = max(prow[first_invalid_index+1:l]) # choose among the remainings the one with max probability
                        more_probable_left_index = row_map[prow.index(more_probable_left_value)][0] # retrive (orig_column,n_matches_orig_column)
                        return Nimply(*(more_probable_left_index,1)) #remove all matches from the most probabale
                    
                return Nimply(*(row_map[row][0],1))    

            win_count=0
            for i in range (ACCURACY): 

                strategy = (comparison_strategy, evolutionary)
                player = i%2
                state = deepcopy(grid)#reset grid
                #simulate game
                while state:
                
                    ply = strategy[player](state)
                    state.nimming(ply)
                    player = 1 - player
                    
                #add victory
                if player==1:#modified rules
                    win_count+=1
            
            result.append((win_count/ACCURACY,new_p,new_sigma_p))

        #1st attempt (1,LAMBDA) #60%
        #best_child = max(result,key=lambda c: c[0] )
        #genome = {"past_win":best_child[0] , "p":best_child[1] , "sigma_p":new_sigma_p}

        #2nd attempt (1+LAMBDA) (keep code of 2nd attempt)
        result.append((genome["past_win"],genome["p"],genome["sigma_p"]))
        best_child = max(result,key=lambda c: c[0] )
        genome = {"past_win":best_child[0] , "p":best_child[1] , "sigma_p":best_child[2]}
        

    #logging.info(f"win ratio:{genome['win_ratio']}")
    logging.info(f"genome:{genome}")

    def evolutionary(state):
        #same as previous except line with comment
        
        prow = genome["p"]#taken by genome
        row_map = sorted(enumerate(state.rows), key=lambda r: r[1])# ordered by growing number of elements (the original order may not correspond)
        num_objects = 0 # useful to stop cicle in case of finding a valid row
        n_rand = 0 # stop generating after n_rand >= RAND_MAX
                
        while num_objects == 0:

            row = np.random.choice(range(len(prow)),p=prow) # random choice with probability p[i] to pick element i 
            num_objects = state.rows[row_map[row][0]] # row_map[row][0] is the actual row, row_map[row][1] is the number of elements in the actual row 
                        
            n_rand+=1
            if n_rand>=MAX_RAND:

                first_invalid_index = l-1 #find the number of remaining row !=0
                for i in range(l-1,-1,-1):
                    if row_map[i][1] == 0:
                        first_invalid_index = i
                        break
                            
                more_probable_left_value = max(prow[first_invalid_index+1:l]) # choose among the remainings the one with max probability
                more_probable_left_index = row_map[prow.index(more_probable_left_value)][0] # retrive (orig_column,n_matches_orig_column)
                return Nimply(*(more_probable_left_index,1)) #remove all matches from the most probabale
                    
        return Nimply(*(row_map[row][0],1))    

    return evolutionary
    



In [120]:
#train
logging.getLogger().setLevel(logging.INFO)
#it doesn't make sense to train on optimal at least in first phase

nim = Nim(5)
evolutionary=adaptive(nim,pure_random)

INFO:root:genome:{'past_win': 0.652, 'p': [0.025740317851924342, 0.8724555013657693, 0.07121693812091899, 0.030583291187654453, 3.9514737329499265e-06], 'sigma_p': [0.3537607666507349, 1.6743770492337033, 1.0091555040750952, 0.2496151498755137, 21.650565634527762]}


In [199]:
#test
nim = Nim(5)
strategy = (pure_random, evolutionary)
#strategy 2 win more than gabriele

TEST_SAMPLE = 100000

logging.info(f"init : {nim}")

win_count = 0
for i in range(TEST_SAMPLE):
    nim = Nim(5)
    player = i%2
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    
    if player==1:
        win_count+=1


logging.info(f"status: evolutionary result: {win_count/TEST_SAMPLE*100}% won!")
#gain of 2% against random if compared to not gabriele

INFO:root:init : <1 3 5 7 9>
INFO:root:status: evolutionary result: 62.21% won!
