## IMPORT

In [2]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor
import numpy as np

## DEFINITION

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")
#this is the "turn" that each strategy use. row is the index of the row i want to take elements from, num_objects the number of objects to take

In [45]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        #self._rows = [3 for i in range(3)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    def __len__(self):
        return len(self._rows)

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def rows_str(self) -> str:
        result =  ''.join(" "+str(_) for _ in sorted(filter(lambda c : c,self._rows)))
        if result == " " or result == "": return " 0"
        return result

    def rows_list(self) -> list:
        return sorted(filter(lambda c : c,self._rows))

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [5]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result

def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["random_row"] = random.choice( [ r for r , c in enumerate(state.rows) if c > 0 ] )
    cooked["nim_sum"] = nim_sum(state)

    good_moves = list() #moves that bring to a safe position but
    best_moves = list()
    worst_moves = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        nim_sum_tmp = nim_sum(tmp)
        if(nim_sum_tmp == 0): 
            best_moves.append((m))
        else:
            worst_moves.append(m)
    cooked["best_moves"] = best_moves
    cooked["worst_moves"] = worst_moves

    return cooked

## HARD CODED STRATEGY

In [6]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice( [ r for r , c in enumerate(state.rows) if c > 0 ] )
    num_objects = random.randint(1,state.rows[row])
    return Nimply(row,num_objects)



## EVOLVABLE STRATEGY

In [8]:
def make_strategy(genome: dict) -> Callable:
    def check_function_create(random_number)-> Callable:
            probability_range=0
            def check_function(probability):
                nonlocal probability_range
                if probability == 0: #check to avoid calling the move with probability = 0 when confronting with random number = 0
                    return False
                if (probability+probability_range) >= random_number:
                    return True
                probability_range= probability_range + probability
                return False
            return check_function
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)
#what is p? a vector of probability?
#or better, a vector of interval
#if random is in that interval, than that move is choosen
        #probability_vector = genome["p"]
        random_number = random.random()*100
        check_function = check_function_create( random_number )
        move = next(move_number for  move_number , move_probability in genome.items() if check_function(move_probability))
        match move:
            case 0:
                ply = Nimply(data["shortest_row"], random.randint(1, state.rows[data["shortest_row"]]))
            case 1:
                ply = Nimply(data["shortest_row"], 1)
            case 2:
                ply = Nimply(data["shortest_row"], state.rows[data["shortest_row"]])
            case 3:
                ply = Nimply(data["longest_row"], random.randint(1, state.rows[data["longest_row"]]))
            case 4:
                ply = Nimply(data["longest_row"], 1)
            case 5:
                ply = Nimply(data["longest_row"], state.rows[data["longest_row"]])
            case 6:
                if(data['best_moves']): #if a best move exists
                    (row,num_object) =random.choice(data['best_moves'])
                else: #else, pick a random move
                    (row,num_object) = random.choice(data['possible_moves'])
                ply = Nimply(row,num_object)
            case 7:
                if(data['worst_moves']): #if a worst move exists
                    (row,num_object) = random.choice(data['worst_moves'])
                else: #else, pick a random move
                    (row,num_object) = random.choice(data['possible_moves'])
                ply = Nimply(row,num_object)

            case _:

                assert False,f"cannot find a move in the probability range with rand:{random_number} and genome:{genome}"
        return ply

    return evolvable

def evolve(genome: dict, slew_rate) -> dict:
    new_genome = deepcopy(genome)
    total_probability=0
    
    for  move_number , move_probability in new_genome.items():
        random_number = random.random()
        
        if random_number > 0.9:
            new_genome[move_number] = new_genome[move_number]+ (2*slew_rate)
        elif random_number > 0.5:
            new_genome[move_number] = new_genome[move_number]+ (slew_rate)
        elif random_number >0.1:
            new_genome[move_number] = new_genome[move_number]- (slew_rate)
        else:
            new_genome[move_number] = new_genome[move_number]- (slew_rate)
        if new_genome[move_number] < 0 :
            new_genome[move_number] = 0
        
        total_probability = total_probability + new_genome[move_number]
    divisor = total_probability / 100
    result = dict(np.apply_along_axis(lambda val: [val[0] , val[1] / divisor], 1, np.array(list(new_genome.items()))))
    return result
def print_genome(genome:dict) -> str:

    str = ""
    for move_n, move_prob in genome.items():

        match(int(move_n)):
            case 0:
                str = str + (f"(shortest_row, random) : {move_prob}\n")
            case 1:
                str = str + (f"(shortest_row, 1) : {move_prob}\n")
            case 2:
                str = str + (f"(shortest_row, max) : {move_prob}\n")
            case 3:
                str = str + (f"(longest_row, random) : {move_prob}\n")
            case 4:
                str = str + (f"(longest_row, 1) : {move_prob}\n")
            case 5:
                str = str + (f"(longest_row, max) : {move_prob}\n")
            case 6:
                str = str + (f"nim_sum == 0 : {move_prob}\n")
            case 7:
                str = str + (f"nim_sum != 0 : {move_prob}\n")
    return str

In [46]:
NUM_MATCHES = 100
NIM_SIZE = 5


def evaluate(strategy1: Callable , strategy2: Callable) -> float:
    opponent = (strategy1, strategy2)
    won = 0

    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE)
        player = 0
        while nim:
            ply = opponent[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / NUM_MATCHES

def single_match(strategy1: Callable , strategy2: Callable) -> int:
    opponent = (strategy1, strategy2)

    nim = Nim(NIM_SIZE)
    player = 0
    print(f"start: {nim}")
    while nim:
        ply = opponent[player](nim)
        nim.nimming(ply)
        print(f"player {player}: {nim} - nimsum:{nim_sum(nim)} - str:{nim.rows_str()}")
        player = 1 - player
    if player == 1:
        print(f"player 0 won!")
        return 0
    else:
        print(f"player 1 won!")
        return 1 

In [57]:


strat_1 =evolve({ k: v for  k, v in enumerate([100,100,100,100,100,100,100,100]) },0)
strat_2 ={ k: v for  k, v in enumerate([0,0,0,0,0,0,100,0]) }
strat_3 ={ k: v for  k, v in enumerate([0,0,0,0,0,0,0,100]) }
single_match(make_strategy(strat_1),make_strategy(strat_2))
evaluate(make_strategy(strat_1),make_strategy(strat_3))

start: <1 3 5 7 9>
player 0: <0 3 5 7 9> - nimsum:8 - str: 3 5 7 9
player 1: <0 3 5 7 1> - nimsum:0 - str: 1 3 5 7
player 0: <0 3 5 3 1> - nimsum:4 - str: 1 3 3 5
player 1: <0 3 1 3 1> - nimsum:0 - str: 1 1 3 3
player 0: <0 3 0 3 1> - nimsum:1 - str: 1 3 3
player 1: <0 3 0 3 0> - nimsum:0 - str: 3 3
player 0: <0 0 0 3 0> - nimsum:3 - str: 3
player 1: <0 0 0 0 0> - nimsum:0 - str: 0
player 1 won!


0.79

In [11]:
ITERATION_MAX = 500
WIN_THRESHOLD = 0.8
def evolve_against_one_opponent(genome_opponent: dict, genome_base: list, learning_rate: float) -> dict:
    opponent_strategy = make_strategy(genome_opponent)
    population_size = len(genome_base)
    genome_population = []
    strategy_population = []
    fitness_population = np.zeros(population_size)
    #create a base population
    for i in range(population_size):
        genome_tmp = random.choice(genome_base)
        genome_population.append(deepcopy(genome_base[i]))
        strategy_population.append(make_strategy(genome_population[i]))
        fitness_population[i] = evaluate(strategy_population[i],opponent_strategy)
    
    fitness=0
    iteration_n = 0
    while fitness < WIN_THRESHOLD and iteration_n < ITERATION_MAX:
        iteration_n = iteration_n + 1 
        genome_tmp = evolve(random.choice(genome_population), learning_rate)
        strategy_tmp = make_strategy(genome_tmp)
        fitness_tmp = evaluate(strategy_tmp,opponent_strategy)
        swap_list = [i for i,win_r in enumerate(list(fitness_population)) if win_r < fitness_tmp]
        
        if swap_list:
            swap = random.choice(swap_list)
            genome_population[swap] = genome_tmp
            strategy_population[swap] = strategy_tmp
            fitness_population[swap] = fitness_tmp
            fitness = fitness_tmp
    best = fitness_population.argmax()
    return {"best":(fitness_population[best], genome_population[best]) , "list": genome_population}

In [44]:
genome_list = []
for i in range(10):
    genome_tmp = {k: v for  k, v in enumerate([random.randint(0,100),random.randint(0,100),random.randint(0,100),random.randint(0,100),random.randint(0,100),random.randint(0,100),random.randint(0,100),random.randint(0,100)])}
    genome_list.append(evolve(genome_tmp,1)) #to get the total probability back to 100%
dumb_opponent = {k: v for  k, v in enumerate([0,100,0,0,0,0])} #take always only one from the shortest row
result = evolve_against_one_opponent(dumb_opponent,genome_list,5)
genome_list = result["list"]

print(f"against dumb: win rate={result['best'][0]} with genome=\n{print_genome(result['best'][1])}")
start_genome = result['best'][1]
for i in range(5):
    better_opponent = result["best"][1]
    result = evolve_against_one_opponent(better_opponent,genome_list,5)
    genome_list = result["list"]
    better_opponent = result["best"][1]
    print(f"against better: win rate={result['best'][0]} with genome=\n{print_genome(better_opponent)}")
print(f"start vs finish: { evaluate(make_strategy(better_opponent),make_strategy(start_genome)) }")

against dumb: win rate=0.88 with genome=
(shortest_row, random) : 23.202614379084967
(shortest_row, 1) : 2.287581699346405
(shortest_row, max) : 5.555555555555555
(longest_row, random) : 17.647058823529413
(longest_row, 1) : 0.6535947712418301
(longest_row, max) : 31.69934640522876
nim_sum == 0 : 15.032679738562091
nim_sum != 0 : 3.9215686274509802

against better: win rate=0.85 with genome=
(shortest_row, random) : 4.2869312774042285
(shortest_row, 1) : 0.0
(shortest_row, max) : 5.770633547010291
(longest_row, random) : 5.191381831764994
(longest_row, 1) : 6.586520216154968
(longest_row, max) : 0.0
nim_sum == 0 : 78.1645331276655
nim_sum != 0 : 0.0

against better: win rate=0.81 with genome=
(shortest_row, random) : 0.8997570062761379
(shortest_row, 1) : 0.0
(shortest_row, max) : 5.693719509268098
(longest_row, random) : 0.0
(longest_row, 1) : 0.0
(longest_row, max) : 0.0
nim_sum == 0 : 92.65312978138977
nim_sum != 0 : 0.7533937030659843

against better: win rate=1.0 with genome=
(sho

In [56]:
#STILL WORK IN PROGRESS
def minmax(board:Nim):
    max_row = max([k for r,k in enumerate(board.rows)]) #get the max number of item for a row
    max_row_n = len(board)
    print(f"n row: {max_row_n}, max_object: {max_row}")
    minmax_state = dict()
    def nim_sum_list( state: list) -> int:
        *_, result = accumulate(state, xor)
        return result

    def all_prev_moves( board_state : tuple ) -> list:
        tmp_board = deepcopy(board_state)
        cost = tmp_board[1]
        rows = tmp_board[0]
        possible_moves = []
        nonlocal max_row_n #needed only for ipynb
        nonlocal max_row #needed only for ipynb
        new_rows = rows
        if len(rows) < max_row_n:
            new_rows.append(0)
        for i,row in enumerate(new_rows):
            if row <= max_row:
                for c in range(row+1,max_row+1):
                    next_move = deepcopy(rows)
                    next_move[i] = c
                    if nim_sum_list(next_move)!=0 : next_cost = 1
                    else: next_cost = 0
                    tmp_list = sorted(filter(lambda c : c!=0,next_move))
                    if tmp_list : 
                        tmp_list = (tmp_list,next_cost)
                        #print(f"adding to all_prev_moves list:{tmp_list}")
                        possible_moves.append(tmp_list) 
        return possible_moves

    def update_state( move: tuple )-> bool:
        nonlocal minmax_state
        #print(f"update_state with move {move}")
        row = move[0]
        cost = move[1]
        if cost < 0: return
        row_key = ''.join(" "+str(_) for _ in row)
        if row_key in minmax_state.keys():
            if cost < minmax_state[row_key]:
                minmax_state[row_key] = cost
                #print(f"update state {row_key} with cost: {cost}")
                return move
        else:
            #print(f"add state {row_key} with cost: {cost}")
            minmax_state[row_key] = cost
            return move
        return

    next_moves = []
    z= 0
    first_move = [z]
    first_cost = z
    next_moves = [ (first_move, first_cost) ]
    #print(f"first move: {next_moves}")
    while next_moves:
        #print(f"next_moves: {next_moves}")
        next_moves = list(filter(update_state,next_moves)) #add state to minmax, keep only usefull moves
        #print(f"next_moves after_filter: {next_moves}")
        tmp_moves = next_moves
        for move in tmp_moves : #for each move keeped, compute all the possible next moves
            if move:
                
                #print(f"move: {move}")
                tmp = all_prev_moves(move)
                #print(f"next_moves before add: {next_moves}")
                #print(f"move to add: {tmp}")
                #tmp.append(next_moves)
                #print(f"tmp: {tmp}")
                next_moves = next_moves + tmp
                #print(f"next_moves after_add: {next_moves}")
    print(f"total state:{len(minmax_state)}")
    #print(minmax_state)
    return minmax_state
    
def minmax_make_strategy(minmax_state: dict) -> Callable:

    def choose_moves(board:Nim) -> list:
        def move_in_minmax_state(move:tuple) -> tuple:
            nonlocal minmax_state
            nonlocal board
            tmp = deepcopy(board)
            tmp.nimming(move)
            index = tmp.rows_str()
            #print(f"index:{index} with cost:{minmax_state[index]}")
            #if index in minmax_state.keys(): print(f"cost:{minmax_state[index]}")
            #else: print("index not in minmax_state")
            return (move[0],move[1],minmax_state[index])

        nonlocal minmax_state
        all_moves = [(r, o) for r, c in enumerate(board.rows) for o in range(1, c + 1) if board.k is None or o <= board.k]
        best_move = min( map(move_in_minmax_state,all_moves) ,key = lambda move: move[2] )
        #print(f"best_move:{best_move}")
        return Nimply(best_move[0],best_move[1])
    def strategy(board: Nim) -> Nimply:
        return choose_moves(board)

    return strategy
        
    
nimsum_strategy = make_strategy({ k: v for  k, v in enumerate([0,0,0,0,0,0,100,0]) })
test_board = Nim(5)
print(test_board)
minmax_strategy = minmax_make_strategy(minmax(test_board))

single_match(minmax_strategy,minmax_strategy)




<1 3 5 7 9>
n row: 5, max_object: 9
total state:2002
start: <1 3 5 7 9>
player 0: <1 3 5 7 0> - nimsum:0 - str: 1 3 5 7
player 1: <0 3 5 7 0> - nimsum:1 - str: 3 5 7
player 0: <0 2 5 7 0> - nimsum:0 - str: 2 5 7
player 1: <0 1 5 7 0> - nimsum:3 - str: 1 5 7
player 0: <0 1 5 4 0> - nimsum:0 - str: 1 4 5
player 1: <0 0 5 4 0> - nimsum:1 - str: 4 5
player 0: <0 0 4 4 0> - nimsum:0 - str: 4 4
player 1: <0 0 3 4 0> - nimsum:7 - str: 3 4
player 0: <0 0 3 3 0> - nimsum:0 - str: 3 3
player 1: <0 0 2 3 0> - nimsum:1 - str: 2 3
player 0: <0 0 2 2 0> - nimsum:0 - str: 2 2
player 1: <0 0 1 2 0> - nimsum:3 - str: 1 2
player 0: <0 0 1 1 0> - nimsum:0 - str: 1 1
player 1: <0 0 0 1 0> - nimsum:1 - str: 1
player 0: <0 0 0 0 0> - nimsum:0 - str: 0
player 0 won!


0