## IMPORT

In [86]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor
import numpy as np

## DEFINITION

In [87]:
Nimply = namedtuple("Nimply", "row, num_objects")
#this is the "turn" that each strategy use. row is the index of the row i want to take elements from, num_objects the number of objects to take

In [89]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [91]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result

def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["random_row"] = random.choice( [ r for r , c in enumerate(state.rows) if c > 0 ] )
    cooked["nim_sum"] = nim_sum(state)

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        brute_force.append((m, nim_sum(tmp)))
    cooked["brute_force"] = brute_force

    return cooked

## HARD CODED STRATEGY

In [92]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice( [ r for r , c in enumerate(state.rows) if c > 0 ] )
    num_objects = random.randint(1,state.rows[row])
    return Nimply(row,num_objects)



In [6]:
def greedy_untill_less_than_two_rows(state: Nim) -> Nimply:
    rows_av = [(r,c) for r , c in enumerate(state.rows) if c > 0]
    match len(rows_av):
        case 1:
            (row,num_objects) = rows_av[0] #take the last available row, and all the object in it
            return Nimply(row,num_objects) #you have just won!

        case 2:
            (row,_) = max(rows_av, key=lambda tup:tup[1]) #take the row with the greater number of elements
            return Nimply(row,1) #take only one element

        case _:
            (row,num_objects) = rows_av[0] #take the first available row and all the object in it
            return Nimply(row,num_objects)
    


## EVOLVABLE STRATEGY

In [93]:
def make_strategy(genome: dict) -> Callable:
    def check_function_create(random_number)-> Callable:
            probability_range=0
            def check_function(probability):
                nonlocal probability_range
                if probability == 0: #check to avoid calling the move with probability = 0 when confronting with random number = 0
                    return False
                if (probability+probability_range) >= random_number:
                    return True
                probability_range= probability_range + probability
                return False
            return check_function
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)
#what is p? a vector of probability?
#or better, a vector of interval
#if random is in that interval, than that move is choosen
        #probability_vector = genome["p"]
        random_number = random.random()*100
        check_function = check_function_create( random_number )
        move = next(move_number for  move_number , move_probability in genome.items() if check_function(move_probability))
        match move:
            case 0:
                ply = Nimply(data["shortest_row"], random.randint(1, state.rows[data["shortest_row"]]))
            case 1:
                ply = Nimply(data["shortest_row"], 1)
            case 2:
                ply = Nimply(data["shortest_row"], state.rows[data["shortest_row"]])
            case 3:
                ply = Nimply(data["longest_row"], random.randint(1, state.rows[data["longest_row"]]))
            case 4:
                ply = Nimply(data["longest_row"], 1)
            case 5:
                ply = Nimply(data["longest_row"], state.rows[data["longest_row"]])

            case _:

                assert False,f"cannot find a move in the probability range with rand:{random_number} and genome:{genome}"
        return ply

    return evolvable

def evolve(genome: dict, slew_rate) -> dict:
    new_genome = deepcopy(genome)
    total_probability=0
    
    for  move_number , move_probability in new_genome.items():
        random_number = random.random()
        
        if random_number > 0.9:
            new_genome[move_number] = new_genome[move_number]+ (2*slew_rate)
        elif random_number > 0.5:
            new_genome[move_number] = new_genome[move_number]+ (slew_rate)
        elif random_number >0.1:
            new_genome[move_number] = new_genome[move_number]- (slew_rate)
        else:
            new_genome[move_number] = new_genome[move_number]- (slew_rate)
        if new_genome[move_number] < 0 :
            new_genome[move_number] = 0
        
        total_probability = total_probability + new_genome[move_number]
    divisor = total_probability / 100
    result = dict(np.apply_along_axis(lambda val: [val[0] , val[1] / divisor], 1, np.array(list(new_genome.items()))))
    return result


{0.0: 22.0, 1.0: 28.0, 2.0: 28.0, 3.0: 7.0, 4.0: 10.0, 5.0: 5.0}

In [94]:
NUM_MATCHES = 100
NIM_SIZE = 10


def evaluate(strategy1: Callable , strategy2: Callable) -> float:
    opponent = (strategy1, strategy2)
    won = 0

    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE)
        player = 0
        while nim:
            ply = opponent[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / NUM_MATCHES

In [9]:


strat_1 ={ k: v for  k, v in enumerate([25,25,25,10,7,8]) }
strat_2 ={ k: v for  k, v in enumerate([25,25,25,10,7,8]) }
evaluate(make_strategy(strat_1),make_strategy(strat_2))

0.45

In [81]:
POPULATION_SIZE = 10
POPULATION_SURVIVORS = 5
ITERATION_MAX = 25
WIN_THRESHOLD = 0.8
adapt_rate = 3

#NOT USED!!!
def evolve_strategy(start_genome: dict) -> Callable:
    base_strategy = make_strategy(start_genome)
    fitness=0
    iteration_n = 0
    genome_top = []
    strategy_top = []
    genome_top.append(deepcopy(start_genome))
    strategy_top.append(deepcopy(base_strategy))
    #for i in range(POPULATION_SURVIVORS):
    #    genome_top.append(deepcopy(start_genome))
    #    strategy_top.append(deepcopy(base_strategy))
    genome_best = start_genome
    while fitness < WIN_THRESHOLD and iteration_n < ITERATION_MAX:
        iteration_n = iteration_n + 1 
        genome_pop = []
        strategy_pop = []
        for i in range(POPULATION_SIZE):
            genome_tmp = random.choice(genome_top)
            genome_pop.append(evolve(genome_tmp,adapt_rate))
            strategy_pop.append(make_strategy(genome_pop[i]))
        genome_fitness = np.zeros(POPULATION_SIZE)
        for i in range(POPULATION_SIZE):
            for j in range(POPULATION_SIZE):
                if j != i:
                    if evaluate(strategy_pop[i],strategy_pop[j]) > 0.5:
                        genome_fitness[i] = genome_fitness[i]+1

        best = genome_fitness.argmax()
        genome_best = genome_pop[best]
        strategy_best = strategy_pop[best]
        if len(genome_top) < POPULATION_SURVIVORS:
            genome_top.append(genome_best)
            strategy_top.append(strategy_best)
        else:
            new_genome_result = np.zeros(POPULATION_SURVIVORS)
            is_better = False
            for i in range(POPULATION_SURVIVORS):
                new_genome_result[i]=evaluate(strategy_best,strategy_top[i])
                if new_genome_result[i] > 0.5 : 
                    is_better=True
            if is_better:
                replace = new_genome_result.argmax()
                genome_top[replace] = genome_best
                strategy_top[replace] = strategy_best
        fitness = evaluate(strategy_best,base_strategy)
        #print(f"win rate:{fitness}, genome: {genome_best}")

    genome_fitness = np.zeros(POPULATION_SURVIVORS)
    for i in range(POPULATION_SIZE):
            genome_fitness[i] =evaluate(strategy_top[i],base_strategy)
    best = genome_fitness.argmax()
    fitness = genome_fitness[best]
    genome_best = genome_top[best]
    print(f"FINAL win rate:{fitness}, genome: {genome_best}")



In [136]:
ITERATION_MAX = 500
WIN_THRESHOLD = 0.8
def evolve_against_one_opponent(genome_opponent: dict, genome_base: list, learning_rate: float) -> dict:
    opponent_strategy = make_strategy(genome_opponent)
    population_size = len(genome_base)
    genome_population = []
    strategy_population = []
    fitness_population = np.zeros(population_size)
    #create a base population
    for i in range(population_size):
        genome_tmp = random.choice(genome_base)
        genome_population.append(deepcopy(genome_base[i]))
        strategy_population.append(make_strategy(genome_population[i]))
        fitness_population[i] = evaluate(strategy_population[i],opponent_strategy)
    
    fitness=0
    iteration_n = 0
    while fitness < WIN_THRESHOLD and iteration_n < ITERATION_MAX:
        iteration_n = iteration_n + 1 
        genome_tmp = evolve(random.choice(genome_population), learning_rate)
        strategy_tmp = make_strategy(genome_tmp)
        fitness_tmp = evaluate(strategy_tmp,opponent_strategy)
        swap_list = [i for i,win_r in enumerate(list(fitness_population)) if win_r < fitness_tmp]
        
        if swap_list:
            swap = random.choice(swap_list)
            genome_population[swap] = genome_tmp
            strategy_population[swap] = strategy_tmp
            fitness_population[swap] = fitness_tmp
            fitness = fitness_tmp
    best = fitness_population.argmax()
    return {"best":(fitness_population[best], genome_population[best]) , "list": genome_population}

In [137]:
genome_list = []
for i in range(10):
    genome_tmp = {k: v for  k, v in enumerate([random.randint(0,100),random.randint(0,100),random.randint(0,100),random.randint(0,100),random.randint(0,100),random.randint(0,100)])}
    genome_list.append(evolve(genome_tmp,1)) #to get the total probability back to 100%
dumb_opponent = {k: v for  k, v in enumerate([0,100,0,0,0,0])} #take always only one from the shortest row
result = evolve_against_one_opponent(dumb_opponent,genome_list,5)
genome_list = result["list"]

print(f"against dumb: win rate={result['best'][0]} with genome={result['best'][1]}")
start_genome = result['best'][1]
for i in range(5):
    better_opponent = result["best"][1]
    result = evolve_against_one_opponent(better_opponent,genome_list,5)
    genome_list = result["list"]
    better_opponent = result["best"][1]
    print(f"against better: win rate={result['best'][0]} with genome={better_opponent}")
print(f"start vs finish: { evaluate(make_strategy(better_opponent),make_strategy(start_genome)) }")

against dumb: win rate=0.91 with genome={0.0: 3.1141868512110724, 1.0: 25.259515570934255, 2.0: 25.951557093425606, 3.0: 14.532871972318338, 4.0: 12.45674740484429, 5.0: 18.685121107266436}
against better: win rate=0.74 with genome={0.0: 0.0, 1.0: 7.373593843173378, 2.0: 28.895989853752965, 3.0: 4.545454545454545, 4.0: 0.0, 5.0: 59.18496175761911}
against better: win rate=0.59 with genome={0.0: 9.315385724551284, 1.0: 0.0, 2.0: 51.737426290467695, 3.0: 0.0, 4.0: 13.460932536116694, 5.0: 25.486255448864327}
against better: win rate=0.64 with genome={0.0: 15.994248468405104, 1.0: 3.999999999999999, 2.0: 39.61959132661733, 3.0: 7.68321738490266, 4.0: 0.0, 5.0: 32.70294282007489}
against better: win rate=0.65 with genome={0.0: 4.226343985975054, 1.0: 4.0612854767747075, 2.0: 47.07311389511988, 3.0: 16.889145492420397, 4.0: 0.0, 5.0: 27.750111149709962}
against better: win rate=0.67 with genome={0.0: 4.166666666666667, 1.0: 0.0, 2.0: 45.056575537144596, 3.0: 16.32876160086293, 4.0: 0.0, 5.0