In [1]:
import logging
from collections import namedtuple
import random
from numpy.random import choice
import functools

In [2]:
Nimply = namedtuple("Nimply", "row, num_objects")

class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [3]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

class EvRules:
    def __init__(self,rows):
        self._rules=dict()
        self._game=[]
        self._num_rows_=rows
    
    def evaluate_game(self,won):
        for state,move in self._game:
            self._rules[state]=tuple(sorted([rule if rule!=move else 
            ((rule[0],rule[1],rule[2]+1,rule[3]+1) if won else (rule[0],rule[1],rule[2]-1,rule[3]+1))
             for rule in self._rules[state]],key=lambda a: a[2],reverse=True))
        self._game=[]

    def pickmove(self,state):
        #logging.debug(f"In pickmove with state {state}, state in self.rules? {state in self._rules}")
        if state not in self._rules:
            #logging.debug(f"New state found {state}")
            self._rules[state]=tuple(random.sample(self.__possiblemoves(state),self.__lenpossiblemoves(state))[:5 if self.__lenpossiblemoves(state)>5 else self.__lenpossiblemoves(state)])
            #logging.debug(f"Now moves for state {state} are {self._rules[state]}")
        else:
            #logging.debug(f"Old state {state}")
            if any([rule[2]<0 and rule[3]>2 for rule in self._rules[state]]):
                #logging.debug(f"State has all moves evaluated already {state} -> {self._rules[state]}")
                new_rules=random.sample(self.__newpossiblemoves(state),len(self.__newpossiblemoves(state)))[:5 if len(self.__newpossiblemoves(state))>5 else len(self.__newpossiblemoves(state))]
                badcurrmoves=sum([rule[2]<0 and rule[3]>2 for rule in self._rules[state]])
                len_new_rules=len(new_rules)
                if len_new_rules>0:
                    self._rules[state]=tuple(sorted(list(self._rules[state])[:len(self._rules[state])-badcurrmoves]+new_rules[:badcurrmoves if len_new_rules>badcurrmoves else len_new_rules],key=lambda a: a[2],reverse=True))
                #logging.debug(f"Now fixed and state {state} has moves -> {self._rules[state]}")
        #logging.debug(f"Before picking a move the rules for state {state} are {self._rules[state]}")
        #if any([rule[3]<3 for rule in self._rules[state]]):
            #picked_move=random.choice([rule for rule in self._rules[state] if rule[3]<3])
        #else:
        minfit=min([rule[2] for rule in self._rules[state]])
        weigths=[-minfit+rule[2]+1 for rule in self._rules[state]]
        weigths=[_/sum(weigths) for _ in weigths]
        picked_move_index=choice(list(range(len(weigths))),1,p=weigths)[0]
        picked_move=self._rules[state][picked_move_index]
        self._game.append((state,picked_move))
        #logging.debug(f"Picked move {picked_move} for state {state}")
        return Nimply(picked_move[0],picked_move[1])

    def __possiblemoves(self,state):
        #moves=[(row,toTake,0,0) for row in range(self._num_rows_) for toTake in range(state[row])]
        #logging.debug(f"Moves for state {state} have len{len(moves)}")
        #return moves
        return [(row,toTake+1,0,0) for row in range(self._num_rows_) for toTake in range(state[row])]

    def __newpossiblemoves(self,state):
        return [(row,toTake+1,0,0) for row in range(self._num_rows_) for toTake in range(state[row]) if (row,toTake+1) not in [(_[0],_[1]) for _ in self._rules[state]]]

    def __lenpossiblemoves(self,state):
        #logging.debug(f"LEN POSSIBLE MOVES FOR STATE {state} is {sum(state)}")
        return sum(state)






In [4]:
class EvAlgRules:
    def __init__(self,rows,strategies):
        self._population_size_=10
        self._offspring_size_=30
        self._games_per_genome_=100
        self._strategies=strategies
        self._numstrats=len(strategies)
        self._offspring=[]
        self._num_rows_=rows
        self._genomes=[tuple([1/self._numstrats for _ in strategies]) for _ in range(self._population_size_)]
        self._genomes_visited_already_=set(_ for _ in self._genomes)
        self._population=[(g,self._fitness(g)) for g in self._genomes]
        self._population=sorted(self._population,key=lambda a: a[1],reverse=True)
    
    def _mutate(self,genome):
        points=choice(range(self._numstrats),2,p=[1/self._numstrats for _ in range(self._numstrats)],replace=False)
        change=random.uniform(0,min(genome[points[0]],genome[points[1]]))
        toAdd=random.choice([0,1])
        return tuple([genome[_] if _ not in points else (genome[_]+change if _==points[toAdd] else genome[_]-change) for _ in range(self._numstrats)])


    def get_best_player(self):
        return self._population[0][0]

    def pick_move(self,nim,genome):
        return self._strategies[choice(range(self._numstrats),1,p=genome)[0]](nim)

    def _fitness(self,genome):
        player=0
        starting=random.choice([True,False])
        wins=0
        for _ in range(self._games_per_genome_):
            nim=Nim(11)
            #logging.debug(f"In this game I'm player #{0 if starting else 1}")
            while nim:
                if starting!=player:
                    ply=self._strategies[choice(range(self._numstrats),1,p=genome)[0]](nim)
                else:
                    ply=pure_random(nim)
                nim.nimming(ply)
                #logging.debug(f"After player {player} move now rows are {nim}")
                player=1-player
            winner=1-player
            won=(winner==0 and starting) or (winner==1 and not starting)
            if won:
                wins+=1
            starting=random.choice([True,False])
        return wins/self._games_per_genome_


    def __str__(self):
        return f"The best player right now won {self._population[0][1]*self._games_per_genome_} against the pure random bot!"

    def evolve(self,gens):
        print("At beginning the ev was",self)
        for _ in range(gens):
            for __ in range(self._offspring_size_):
                new_genome=None
                while new_genome is None or new_genome in self._genomes_visited_already_:
                    new_genome=self._mutate(random.choice(self._population)[0])
                self._offspring.append((new_genome,self._fitness(new_genome)))
                self._genomes_visited_already_.add(new_genome)
            self._population=tuple(sorted(list([(_[0],self._fitness(_[0])) for _ in self._population])+self._offspring,key=lambda a: a[1],reverse=True)[:self._population_size_])
            self._offspring=[]
            print(f"Finished gen {_}, now ev is",self)

In [5]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))

def _nimsum(state):
    return functools.reduce(lambda a,b : a^b,state)

def optimal(state: Nim) -> Nimply:
    nimsum=_nimsum(state._rows)
    if nimsum==0:
        return pure_random(state)
    else:
        for _ in reversed(range(len(state._rows))):
            if state._rows[_]^nimsum<nimsum:
                return (_,state._rows[_]-(state._rows[_]^nimsum))
        return pure_random(state)


mp=EvAlgRules(11,[gabriele,pure_random,optimal])
mp.evolve(12)
print(mp)

At beginning the ev was The best player right now won 79.0 against the pure random bot!
Finished gen 0, now ev is The best player right now won 80.0 against the pure random bot!
Finished gen 1, now ev is The best player right now won 94.0 against the pure random bot!
Finished gen 2, now ev is The best player right now won 92.0 against the pure random bot!
Finished gen 3, now ev is The best player right now won 97.0 against the pure random bot!
Finished gen 4, now ev is The best player right now won 97.0 against the pure random bot!
Finished gen 5, now ev is The best player right now won 98.0 against the pure random bot!
Finished gen 6, now ev is The best player right now won 98.0 against the pure random bot!
Finished gen 7, now ev is The best player right now won 98.0 against the pure random bot!
Finished gen 8, now ev is The best player right now won 99.0 against the pure random bot!
Finished gen 9, now ev is The best player right now won 98.0 against the pure random bot!
Finished gen

In [6]:
mpev=mp.get_best_player()
print(mpev)

(0.0018902538645309175, 0.005493568103918234, 0.9926161780315508)


In [7]:
#myPlayer=EvRules(11)
#wins=0


In [8]:
wins=0
logging.getLogger().setLevel(logging.DEBUG)
starting=random.choice([True,False])
player=0
NUM_GAMES=100
for _ in range(NUM_GAMES):
    nim=Nim(11)
    logging.debug(f"In this game I'm player #{0 if starting else 1}")
    while nim:
        if starting!=player:
            ply=mp.pick_move(nim,mpev)
        else:
            ply=gabriele(nim)
        nim.nimming(ply)
        #logging.debug(f"After player {player} move now rows are {nim}")
        player=1-player
    winner=1-player
    won=(winner==0 and starting) or (winner==1 and not starting)
    #myPlayer.evaluate_game(won)
    if won:
        wins+=1
    starting=random.choice([True,False])

logging.debug(f"After {NUM_GAMES} my player won {wins} games!")



DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #1
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #0
DEBUG:root:In this game I'm player #1
DEBUG:root:I

In [None]:
def getNimRows(nim):
    return nim._rows
def getNimElsAtRow(nim,row):
    return nim._rows[row]

def removeAllFromMax(nim):
    return (max((x for x in enumerate(nim._rows)), key=lambda y: y[1])[0],nim._rows[max((x for x in enumerate(nim._rows)), key=lambda y: y[1])[0]])

def removeOneFromRandom(nim):
    return (random.choice([_ for _ in range(len(nim._rows)) if nim._rows[_]>0]),1)

def removeAllFromMin(nim):
    return (min((x for x in enumerate(nim._rows) if x[1] > 0), key=lambda y: y[1])[0],nim._rows[min((x for x in enumerate(nim._rows) if x[1] > 0), key=lambda y: y[1])[0]])

def removeEvenFromRandom(nim):
    if any([_>1 for _ in nim._rows]):
        row=random.choice([_ for _ in range(len(nim._rows)) if nim._rows[_]>1])
        even=random.choice([_+1 for _ in range(nim._rows[row]) if (_+1)%2])
        return (row,even)
    else:
        return pure_random(nim)

def removeOddFromRandom(nim):
    row=random.choice([_ for _ in range(len(nim._rows)) if nim._rows[_]>0])
    odd=random.choice([_+1 for _ in range(nim._rows[row]) if (_+1+1)%2])
    return (row,odd)

def removeRemainingInRowToOtherRow(nim):
    if sum([_>0 for _ in nim._rows])>1:
        possiblepoints=[_ for _ in range(len(nim._rows)) if nim._rows[_]>0]
        points=choice(possiblepoints,2,p=[1/len(possiblepoints) for _ in possiblepoints],replace=False)
        if nim._rows[points[0]]<nim._rows[points[1]]:
            return (points[1],nim._rows[points[0]])
        else:
            return (points[0],nim._rows[points[1]])
    else:
        return removeAllFromMax(nim)

"""def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state)

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        brute_force.append((m, nim_sum(tmp)))
    cooked["brute_force"] = brute_force

    return cooked"""

class Policy:
    def __init__(self) -> None:
        self.rules=[]

    def pickmove(self,nim):
        for rule in self.rules:
            rule.setNim(nim)
            if rule:
                return rule.action()
        return pure_random(nim)

def getRandomRow(nim):
    return random.choice([_ for _ in range(len(nim._rows))])

def getActiveRows(nim):
    return sum([_>0 for _ in nim._rows])

def getElsInMaxRow(nim):
    return max(nim._rows)

def getElsInMinActiveRow(nim):
    return min([_ for _ in nim._rows])

def getOddActiveRows(nim):
    return sum([_>0 and not (2*_+1)%2 for _ in nim._rows])

def getEvenActiveRows(nim):
    return sum([_>0 and _%2 for _ in nim._rows])

def getAvgActiveRows(nim):
    return sum(nim._rows)/sum([_>0 for _ in nim._rows])

class Rule:
    def __init__(self,nim):
        self.nim=nim
        self.ops=["mul","add","sub","div","not","xor","or","and","eq","ne","lt","le","gt","ge","val"]
        self.possiblevalues=[getRandomRow,getActiveRows,getElsInMaxRow,getElsInMinActiveRow,getOddActiveRows,getEvenActiveRows,getAvgActiveRows]
        self.node=Node(nim,None,"lt",None,None)
        firstChild=Node(nim,self.node,None,None,getActiveRows)
        secondChild=Node(nim,self.node,None,2,None)
        self.node.addChild(firstChild)
        self.node.addChild(secondChild)
        self.actions=[removeAllFromMax,removeOneFromRandom,removeAllFromMin,removeEvenFromRandom,removeOddFromRandom,removeRemainingInRowToOtherRow]
        self.act=random.choice(self.actions)
    def setNim(self,nim):
        self.nim=nim
        self.node.setNim(nim)

    def __bool__(self):
        return self.node.eval()

    def action(self):
        return self.act(self.nim)

class Node:
    def __init__(self,nim,parent,op,value,func) -> None:
        self.parent=parent
        self.nim=nim
        self.childs=[]
        if op:
            self.op=op
            self.leaf=False
            self.value=None
        else:
            self.leaf=True
            self.op=None
            if value:
                self.funcval=None
                self.val=value
            else:
                self.funcval=func
                self.val=None

    def setNim(self,nim):
        self.nim=nim
        if not self.leaf:
            for child in self.childs:
                child.setNim(nim)

    def addChild(self,a):
        self.childs.append(a)

    def eval(self):
        if self.leaf:
            if self.val:
                return self.val
            else:
                return self.val(self.nim)
        else:
            evals=[child.eval() for child in self.childs]
            #if self.op=="in":
                #return evals[0] in evals[1]
            if self.op=="mul":
                return functools.reduce(lambda a,b: a*b,evals)
            elif self.op=="add":
                return sum(evals)
            elif self.op=="sub":
                return functools.reduce(lambda a,b: a-b,evals)
            elif self.op=="div":
                return functools.reduce(lambda a,b: a/b,evals)
            elif self.op=="not":
                return not evals[0]
            elif self.op=="xor":
                return functools.reduce(lambda a,b: a^b,evals)
            elif self.op=="or":
                return functools.reduce(lambda a,b: a or b,evals)
            elif self.op=="eq":
                return evals[0]==evals[1]
            elif self.op=="ne":
                return evals[0]!=evals[1]
            elif self.op=="lt":
                return evals[0]<evals[1]
            elif self.op=="le":
                return evals[0]<=evals[1]
            elif self.op=="gt":
                return evals[0]>evals[1]
            elif self.op=="ge":
                return evals[0]>=evals[1]
            elif self.op=="and":
                return functools.reduce(lambda a,b: a and b,evals)