In [268]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor

In [269]:
Individual = namedtuple("Individual", ["genome", "fitness"])
Nimply = namedtuple("Nimply", "row, num_objects")

POPULATION_SIZE = 20      
OFFSPRING_SIZE = 20        
NUM_GENERATIONS = 5      
PROBLEM_SIZE = 5
TOURNAMENT_SIZE =2
GENETIC_OPERATOR_RANDOMNESS = 0.3
num_rows=5

In [270]:
class Nim:
    def __init__(self, num_rows: int, k: int = None):
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self):
        return self._k

    def nimming(self, ply: Nimply):
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects



In [271]:
"""remove from a random row a random number of objects"""
def pure_random(state: Nim):
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

"""Pick always the maximum possible number of the lowest row"""
def dump_PCI_max_lowest(state: Nim):
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))

"""Pick always the minimum possible number of the lowest row"""
def dumb_PCI_min_lowest(state: Nim):
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], -m[1])))

"""Pick always the maximum possible number of the longest row"""
def dumb_PCI_max_longest(state: Nim):
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (m[0], m[1])))

"""Pick always the minimum possible number of the longest row"""
def dump_PCI_min_longest(state: Nim):
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (m[0], -m[1])))

"""optimal strategy"""
def nim_sum(state: Nim):
    *_, result = accumulate(state.rows, xor)
    return result

def cook_status(state: Nim):
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state)

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        brute_force.append((m, nim_sum(tmp)))
    cooked["brute_force"] = brute_force

    return cooked

def optimal_startegy(state: Nim):
    data = cook_status(state)
    return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]

"""the agent manually can play without fixed rules"""
def manual(state:Nim):
    while True:
        try:
            row=int(input("Enter the number of row(from 1 up):"))
            if row<=len(state._rows) and row>0 and state._rows[row-1]!=0:
                row-=1
                break
            else:
                print("No objects in this row. Try again.")
        except ValueError:
            print("Invalid input. Try again.")
    while True:
        try:
            num_objects=int(input("Enter the number of objects to take:"))
            if num_objects>0 and state._rows[row]-num_objects>=0:
                break
            else:
                print("You cannot take this number of objects, be serious. Try again.")
        except ValueError:
            print("Invalid input. Try again.")
    return Nimply(row,num_objects)

"""evolutionary rules"""
#def make_strategy(genome: tuple):
def evolvable(state: Nim, genome: tuple):
    threshold_alpha = 0.5
    threshold_beta = 0.5
    ply = Nimply(0,0)

    if threshold_alpha < genome[0] and threshold_beta < genome[1]:
        ply = dumb_PCI_max_longest(state)
    if threshold_alpha < genome[0] and threshold_beta > genome[1]:
        ply = dump_PCI_min_longest(state)
    if threshold_alpha > genome[0] and threshold_beta < genome[1]:
        ply = dump_PCI_max_lowest(state)
    if threshold_alpha > genome[0] and threshold_beta > genome[1]:
        ply = dumb_PCI_min_lowest(state)

    return ply

    #return evolvable

In [272]:
"""evaluation of the strategy used"""
NUM_MATCHES = 10
NIM_SIZE = 5

def evaluate(nim: Nim, genome: tuple):
    won = 0

    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE)
        player = 0
        while nim:
            if player == 0:
                ply = evolvable(nim, genome)
            else:
                ply = pure_random(nim)
                
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / NUM_MATCHES

In [273]:

def tournament(population, tournament_size=TOURNAMENT_SIZE):          
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness) 

"""take 2 genome and avg their alpha and beta"""
def cross_over(g1, g2):                      
    alpha_avg = (g1[0] + g2[0]) / 2
    beta_avg = (g1[1] + g2[1]) / 2
    return (alpha_avg, beta_avg)

"""change slightly alpha or beta inside a genome"""
def mutation(g):
    quantity = 0.1
    new_alpha = 0
    new_beta = 0
    index = random.randint(0, 1) #decided the parameter to change 
    plus_or_minus = random.randint(0, 1) #decided if add or subtract quantity
    
    if index > 0.5 and plus_or_minus > 0.5: #add alpha
        if g[0] + quantity < 1:
            new_alpha = g[0] + quantity
        else:
            new_alpha = g[0] - quantity
    if index < 0.5 and plus_or_minus > 0.5: #add beta
        if g[1] + quantity < 1:
            new_beta = g[1] + quantity
        else:
            new_beta = g[1] - quantity
    if index > 0.5 and plus_or_minus < 0.5: #subtract alpha
        if g[0] - quantity > 0:
            new_alpha = g[0] - quantity
        else:
            new_alpha = g[0] + quantity
    if index > 0.5 and plus_or_minus < 0.5: #substract beta
        if g[1] - quantity > 0:
           new_beta = g[1] - quantity 
        else: new_beta = g[1] + quantity
        
    return (new_alpha, new_beta)
        
def compute_fitness(nim,genome): 
    return evaluate(nim,genome)

In [274]:
def my_move(nim):
    global population
    for g in range(NUM_GENERATIONS):
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            if random.random() < GENETIC_OPERATOR_RANDOMNESS:                         
                p = tournament(population)                  
                o = mutation(p.genome)                    
            else:                                          
                p1 = tournament(population)                 
                p2 = tournament(population)
                o = cross_over(p1.genome, p2.genome)            
            f = compute_fitness(nim, o)                                                          
            offspring.append(Individual(o, f))                 
        population += offspring
        population = set(population)  #remove duplicate
        population = list(population)  
        population = sorted(population, key=lambda i: i[1], reverse = True)[:POPULATION_SIZE]
    
    #print(population)
    return evolvable(nim, population[0][0])

In [275]:
logging.getLogger().setLevel(logging.DEBUG)

nim = Nim(5)
population = list()
while len(population)<POPULATION_SIZE:
    alpha = random.randint(0, 1)
    beta = random.randint(0, 1)    
    genome = (alpha,beta)
    population.append(Individual(genome, compute_fitness(nim, genome)))
    
    
logging.debug(f"status: Initial board  -> {nim}")
player = 0
while nim:
    if player == 0:
        ply = my_move(nim)
        print(f"my move: {ply}")
    else:
        ply = pure_random(nim)
        print(f"opponent move: {ply}")
    
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    player = 1 - player
winner = 1 - player
logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9>
DEBUG:root:status: After player 0 -> <0 3 5 7 9>
DEBUG:root:status: After player 1 -> <0 3 5 7 5>
DEBUG:root:status: After player 0 -> <0 0 5 7 5>
DEBUG:root:status: After player 1 -> <0 0 5 3 5>
DEBUG:root:status: After player 0 -> <0 0 0 3 5>
DEBUG:root:status: After player 1 -> <0 0 0 3 2>
DEBUG:root:status: After player 0 -> <0 0 0 0 2>
DEBUG:root:status: After player 1 -> <0 0 0 0 0>
INFO:root:status: Player 1 won!


[Individual(genome=(0.0, 0.95), fitness=1.0), Individual(genome=(0.0, 1.0), fitness=1.0), Individual(genome=(0.25, 0.75), fitness=0.9), Individual(genome=(0.15, 0.65), fitness=0.9), Individual(genome=(0.0, 0.95), fitness=0.9), Individual(genome=(0, 1), fitness=0.9), Individual(genome=(0.0625, 0.8625), fitness=0.9), Individual(genome=(0.0, 0.975), fitness=0.9), Individual(genome=(1, 1), fitness=0.8), Individual(genome=(0.0, 0.975), fitness=0.8), Individual(genome=(0.15625, 0.81875), fitness=0.8), Individual(genome=(0.075, 0.825), fitness=0.8), Individual(genome=(0, 0), fitness=0.8), Individual(genome=(0.125, 0.825), fitness=0.8), Individual(genome=(0, 1), fitness=0.8), Individual(genome=(0.03125, 0.9312499999999999), fitness=0.8), Individual(genome=(0.125, 0.875), fitness=0.8), Individual(genome=(0.03125, 0.91875), fitness=0.8), Individual(genome=(0.0625, 0.8875), fitness=0.8), Individual(genome=(0.125, 0.85), fitness=0.8)]
my move: Nimply(row=0, num_objects=1)
opponent move: Nimply(row