In [1]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy

Nimply = namedtuple("Nimply", "row, num_objects")

class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k
        
    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert num_objects>0
        #assert self._k is None or num_objects <= self._k
        if self._k is not None:
            self._rows[row] -= min(num_objects,self._k)
        else:
            self._rows[row] -= num_objects



class Game():
    def __init__(self, num_heaps: int, first_move: bool, player, opponent,k= None):
        self.nim = Nim(num_heaps, k)
        self.player = player
        self.opponent = opponent
        self.player_turn = first_move


    def make_move(self):
        active_player = self.player if self.player_turn else self.opponent
        self.nim.nimming(active_player(self.nim))
        self.player_turn = not self.player_turn

    def play(self) -> bool:

        while self.nim:
            self.make_move()

        #if after the end it's the player's turn it means he wasn't the one picking the last stone(s)

        return self.player_turn 

In [13]:
from dataclasses import dataclass
from random import randint
from numpy.random import choice, random
from itertools import product
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


Nimply = namedtuple("Nimply", "row, num_objects")

import numpy as np

def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


def calculate_nim_sum(state: Nim) -> int:

    result =0
    for row in state.rows:
        result^=row
    return result

def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = calculate_nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    print(analysis["possible_moves"].items())
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

def expert_system(state: Nim) -> Nimply:

    nim_sum = calculate_nim_sum(state)

    remaining_rows=[i for i,x in enumerate(state.rows) if x>0]
    # if there is just one row left leave only one stone in the said row
    if len(remaining_rows) == 1:
        return Nimply(remaining_rows[0],max(state.rows[remaining_rows[0]]-1,1))
    
    if len(remaining_rows) == 2 and 1 in state.rows:
        largest_row = max(state.rows)
        largest_row_index = state.rows.index(largest_row)
        return Nimply(largest_row_index, largest_row)
    
    #if the number of rows left is 2 we reduce the number of stones from the bigger heap in order to make them equal
    if len(remaining_rows) == 2:
        if state.rows[remaining_rows[0]]>state.rows[remaining_rows[1]]:
            diff=state.rows[remaining_rows[0]]-state.rows[remaining_rows[1]]
            return Nimply(remaining_rows[0],max(diff,1))
        else:
            diff=state.rows[remaining_rows[1]]-state.rows[remaining_rows[0]]
            return Nimply(remaining_rows[1],max(diff,1))
        
    # If the nim sum is already zero, make a random move
    if nim_sum == 0:
        longest_row = max(state.rows)
        longest_row_index = state.rows.index(longest_row)
        return Nimply(longest_row_index, 1)
    else:
        for i in range(len(state.rows)):
            if state.rows[i] >0 and nim_sum ^ state.rows[i] < state.rows[i]:
                stones_to_remove = state.rows[i] - (nim_sum ^ state.rows[i])
                return Nimply(i, stones_to_remove)

    # If no optimal move found, make a random move
    for i in range(len(state.rows)):
        if state.rows[i] > 0:
            return Nimply(i, 1)


strategies = {"random": pure_random, "gabriele" : gabriele, "optimal" : optimal, "expert_system" : expert_system}

class Evolved_Strategy():

    def __init__(self , sigma , weights):
        self.sigma= sigma
        self.weights = weights
        self.strategies = strategies

    def normalize_weights(self, weights):
        
        min_weight = min(weights)

        shifted_weights = [weight - min_weight for weight in weights]
        total = sum(shifted_weights)
        
        probabilities = [weight / total for weight in shifted_weights]

        return probabilities

    def mutate(self):
    
        
        
        child_weights = np.random.normal(loc=0, scale=self.sigma, size=(len(self.strategies))) + self.weights
          
        return Evolved_Strategy(self.sigma,child_weights)
    

    
    def fitness(self, num_heaps = 4):
            
            num_games = 100
            player_wins = 0
            total_games = 0
            
            play_first = True
            
            opponent= gabriele

            for  i in range(num_games) :
                    
                
                probabilities = self.normalize_weights(self.weights)
                
                player = choice(list(self.strategies.values()), p=probabilities)
                game = Game(num_heaps,play_first, player, opponent)
                
                if game.play():
                    player_wins += 1
                total_games += 1

                play_first = not play_first

                    

            return player_wins / total_games
    
    def fitness_all(self, num_heaps = 5):
            
            num_games = 100
            opponents = list(self.strategies.values())
            player_wins = 0
            total_games = 0
            opp_ind =0
            play_first = True
            
            for  i in range(num_games) :
                    
                opponent= opponents[opp_ind]
                
                probabilities = self.normalize_weights(self.weights)
                
                player = choice(list(self.strategies.values()), p=probabilities)
                game = Game(num_heaps,play_first, player, opponent)
                
                if game.play():
                    player_wins += 1
                total_games += 1

                play_first = not play_first
                
                if ((i + 1) % (num_games // len(opponents))) == 0:
                    opp_ind +=1
                    
                

            return player_wins / total_games

In [None]:
def train_EA(λ,steps,sigma, adaptive =False):
    # (1 + λ) ES with self-adaptation
    
    generations = steps // λ
    adaptive_rate=generations//100 # adapt sigma every 100 generations
    stats=[0,0]
    
    initial_weights = np.random.random(4)
    
    initial_solution = Evolved_Strategy(sigma,initial_weights)
    best_solution, best_fit = initial_solution, initial_solution.fitness()
    print(best_solution.weights)
    for step in range(generations):
        
        parent = best_solution
        offspring = [parent.mutate() for _ in range(λ)]  
        evals = np.array([x.fitness() for x in offspring])
        stats[0] += λ
        stats[1] += sum(evals > parent.fitness())
        solution = offspring[np.argmax(evals)]
        sol_fit= evals[np.argmax(evals)]

        if sol_fit > best_fit :
            best_solution = solution
            best_fit = sol_fit
        if adaptive:
            if (step + 1) % adaptive_rate == 0:
                if stats[1] / stats[0] < 1 / 5:
                    best_solution.sigma /= 1.1
                elif stats[1] / stats[0] > 1 / 5:
                    best_solution.sigma *= 1.1
                stats = [0, 0]
            

    print("Final weights: ")
    print(best_solution.weights, best_fit)
    print("Weights normalized as probabilities: ")
    print(best_solution.normalize_weights(best_solution.weights))
    print("Final value of sigma: ")
    print(best_solution.sigma)
    return best_solution.weights

In [None]:
best_weights=train_EA(10,10000,0.01,adaptive=True)

In [None]:
def normalize_weights( weights, prob = True):
        
        min_weight = min(weights)

        shifted_weights = [weight - min_weight for weight in weights]
        
        if not prob:
                return shifted_weights
        
        total = sum(shifted_weights)
        
        probabilities = [weight / total for weight in shifted_weights]

        return probabilities

In [None]:
def EA(weights, nim : Nim):
    positive_weights=normalize_weights(weights,False)
    players= list(strategies.values())
    moves={}
    for i,player in enumerate(players):
        move=player(nim)
        if move in moves:
            moves[move]+=positive_weights[i]
        else:
            moves[move]=positive_weights[i]
        
    move = max(moves, key=lambda k: moves[k])
    return move

In [None]:
def test_EA(weights):
    logging.getLogger().setLevel(logging.INFO)

    opponent = optimal
    #player 1 is the opponent , player 0 is the Evolved agent
    g=False
    player = 1
    won={"0": 0,"1":0}
    for _ in range(50):
        if g:
            nim = Nim(4)
        else:
            nim = Nim(5)
        logging.info(f"init : {nim}")
        print(nim.rows)
        while nim:
            if player:
                ply=opponent(nim)
            else:
                ply=EA(weights,nim)     
            
            logging.info(f"ply: player {player} plays {ply}")
            nim.nimming(ply)
            logging.info(f"status: {nim}")
            player = 1 - player
        won[str(player)]+=1
        logging.info(f"status: Player {player} won!")
    print("Winning games : ",won)

In [None]:
test_EA(best_weights)