## Task

Write agents able to play Nim, with an arbitrary number of rows and an upper bound  𝑘
  on the number of objects that can be removed in a turn (a.k.a., subtraction game).

The goal of the game is to avoid taking the last object.

Task2.1: An agent using fixed rules based on nim-sum (i.e., an expert system)  
Task2.2: An agent using evolved rules using ES

In [None]:
from collections import namedtuple
from pprint import pprint, pformat
from copy import deepcopy
import numpy as np
import logging
import random

## The Nim and Nimply classes

In [None]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [None]:
class Nim:

    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        #print(f"  Row value: {self._rows[row]}")
        #print(f"  K: {self._k}")
        #print(f"  Num objects: {num_objects}")
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

## Strategies

In [None]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

In [None]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))

In [None]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}

In [None]:
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

## Fixed rules based agent

In [None]:
def nim_sum_agent(state: Nim) -> Nimply:
    # Calculate the actual nim sum
    current_nim_sum = nim_sum(state)

    # Go through all rows to find a non null nim sum
    for row, num_objects in enumerate(state.rows):

        if num_objects > 0:
            # Calculate the nim sum if num_objects is substracted from the row
            new_nim_sum = current_nim_sum ^ num_objects

            # If nim sum is changed, it is interesting
            if new_nim_sum != current_nim_sum:
                return Nimply(row, num_objects)

    # If nothing is interesting, play randomly
    return gabriele(state)

## Evolved rules based agent

### create_random_population()

In [None]:
def create_random_population(
    nim: Nim,
    num_individuals = 50
):

  population = []

  if nim._k is not None:
    k = nim._k
  else:
    k = 10000

  for _ in range(num_individuals):
    row = random.randint(0, len(nim.rows)-1)
    if (nim._rows[row] <= 0) and (k):
      maximum = 1
    elif (nim._rows[row] <= 0):
      maximum = k
    elif (k <= 0):
      maximum = nim._rows[row]
    else:
      maximum = min(nim._rows[row], k)
    num_objects = random.randint(1, maximum)
    population.append(Nimply(row, num_objects))

  return population

### evaluate_individual()

In [None]:
def find_max(
    nim: Nim
):

  max = 0
  for i in range(len(nim.rows)):
    if (nim.rows[i] > max):
      max = nim.rows[i]

  return max

In [None]:
def count_elements(
    nim: Nim
):

  sum = 0
  for i in range(len(nim.rows)):
    sum += nim.rows[i]

  return sum

In [None]:
def nim_addition_zero(
    nim: Nim
):

    # Get the value of each row
    lst = []
    for i in range(len(nim.rows)):
          lst.append(nim.rows[i])

    # Convert every integer into binary
    binary_list = [bin(num)[2:] for num in lst]

    # Find the maximal binary size for completion
    max_len = max(len(b) for b in binary_list)

    # Fill all binaries with zeros on the left for completion
    binary_list = [b.zfill(max_len) for b in binary_list]

    # Perform nim-addition without carry over
    result_binary = ''.join('1' if sum(int(b[i]) for b in binary_list) % 2 == 1 else '0' for i in range(max_len))
    #print(result_binary)

    # Check if nim-addition result is zero
    if int(result_binary, 2) == 0:
      return 10000
    else:
      return 0

In [None]:
def evaluate_individual(
    nim: Nim,
    ply
):

  if (is_valid_play(nim = nim, ply = ply)):
    potential_nim = deepcopy(nim)
    potential_nim.nimming(ply)
    #print(potential_nim)
    return nim_addition_zero(potential_nim)

  else:
    return 0

### sort_population()

In [None]:
def sort_population(
    nim: Nim,
    population
):

  sorted_population = sorted(
      population,
      key = lambda x: evaluate_individual(nim = nim, ply = x),
      reverse = True
  )

  return sorted_population

### get_elites()

In [None]:
def get_elites(
    current_population,
    elites_ratio = 0.5
):

  new_pop = []

  for i in range(int(len(current_population) * elites_ratio)):
    new_pop.append(current_population[i])

  return new_pop

### is_valid_play()

In [None]:
def is_valid_play(
    nim: Nim,
    ply
):

  if (ply.row < 0) or (ply.row > len(nim.rows)-1) or (ply.num_objects < 1) or (ply.num_objects > nim._rows[ply.row]) or ((nim._k is not None) and (ply.num_objects > nim._k)):
    return False

  return True

### cross()

In [None]:
def cross(
    nim: Nim,
    current_population,
    elite_population,
    cross_ratio = 0.35,
    elite_crossing_probability = 0.8
):

  num_elites = int(len(current_population) * cross_ratio)

  crossed_population = []

  if nim._k is not None:
    k = nim._k
  else:
    k = 10000

  for _ in range(num_elites):

    if random.random() <= elite_crossing_probability:
      index_1 = random.randint(0, len(elite_population)-1)
      index_2 = random.randint(0, len(elite_population)-1)
      parent_1 = elite_population[index_1]
      parent_2 = elite_population[index_2]
    else:
      index_1 = random.randint(0, len(current_population)-1)
      index_2 = random.randint(0, len(current_population)-1)
      parent_1 = current_population[index_1]
      parent_2 = current_population[index_2]

    child = Nimply(parent_1.row, parent_2.num_objects)
    if not is_valid_play(nim, child):
      child = Nimply(parent_2.row, parent_1.num_objects)
      if not is_valid_play(nim, child):
        row = random.randint(0, len(nim.rows)-1)
        if (nim._rows[row] <= 0) and (k <= 0):
          maximum = 1
        elif (nim._rows[row] <= 0):
          maximum = k
        elif (k <= 0):
          maximum = nim._rows[row]
        else:
          maximum = min(nim._rows[row], k)
        num_objects = random.randint(1, maximum)
        child = Nimply(row, num_objects)

    crossed_population.append(child)

  return crossed_population


### mutate()

In [None]:
def mutate(
    nim: Nim,
    current_population,
    mutation_ratio = 0.15
):

  num_mutated = int(len(current_population) * mutation_ratio)

  mutated_population = []

  if nim._k is not None:
    k = nim._k
  else:
    k = 10000

  for _ in range(num_mutated):
    row = random.randint(0, len(nim.rows)-1)
    if (nim._rows[row] <= 0) and (k <= 0):
      maximum = 1
    elif (nim._rows[row] <= 0):
      maximum = k
    elif (k <= 0):
      maximum = nim._rows[row]
    else:
      maximum = min(nim._rows[row], k)
    num_objects = random.randint(1, maximum)
    mutated_population.append(Nimply(row, num_objects))

  return mutated_population

### evolutionary_agent()

In [None]:
def evolutionary_agent(
    nim: Nim,
    num_generations = 10,
    num_individuals = 50,
    elites_ratio = 0.5,
    cross_ratio = 0.35,
    mutation_ratio = 0.15
):
  # Create first random generation
  current_population = create_random_population(nim = nim, num_individuals = num_individuals)
  # Go through all generations
  for generation in range(num_generations):
    # Rank all individuals of the generation from best to worst
    ranked_population = sort_population(
        nim = nim,
        population = current_population)
    # Get elite individuals of the generation
    elite_population = get_elites(
        ranked_population,
        elites_ratio = elites_ratio)
    # Cross elite individuals to generate new ones
    crossed_population = cross(
        nim = nim,
        current_population = current_population,
        elite_population = elite_population,
        cross_ratio = cross_ratio,
        elite_crossing_probability = 0.8)
    # Mutate individuals to generate new ones
    mutated_population = mutate(
        nim = nim,
        current_population = current_population,
        mutation_ratio = mutation_ratio)
    # Creates the new generation
    current_population = elite_population + crossed_population + mutated_population
  # Rank all individuals of the last generation from best to worst
  population = sort_population(
      nim = nim,
      population = current_population)
  return population[0]

## Simplified match

In [None]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")

INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=6)
INFO:root:status: <1 3 5 7 3>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 7 3>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 3 5 6 3>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=3)
INFO:root:status: <0 3 5 6 0>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <0 1 5 6 0>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=4)
INFO:root:status: <0 1 5 2 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 1 4 2 0>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 0 4 2 0>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 4 1 0>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 4 0 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=2)
INFO:root:status: <0 0

## Match

In [None]:
def match_agents():

  # Game initialization
  num_rows = 9
  k = 4
  nim = Nim(
      num_rows = num_rows,
      #k = k
  )

  print("===============[ Fixed rules  agent VS Evolved rules  agent ]===============")
  print()
  print(f"Initial state: {nim}")

  player = 0
  while nim:
    if player == 0:
      print("----------")
      print("Player 0:")
      ply = nim_sum_agent(nim)
      print(f"Fixed rules agent played {ply}")
    else:
      print("----------")
      print("Player 1:")
      ply = evolutionary_agent(nim)
      print(f"Evolved rules agent played {ply}")
    nim.nimming(ply)
    print(f"New state: {nim}")
    player = 1 - player

  winner = 1 - player
  print(f"Player {winner} has won !")

In [None]:
match_agents()


Initial state: <1 3 5 7 9 11 13 15 17>
----------
Player 0:
Fixed rules agent played Nimply(row=0, num_objects=1)
New state: <0 3 5 7 9 11 13 15 17>
----------
Player 1:
Evolved rules agent played Nimply(row=8, num_objects=16)
New state: <0 3 5 7 9 11 13 15 1>
----------
Player 0:
Fixed rules agent played Nimply(row=1, num_objects=3)
New state: <0 0 5 7 9 11 13 15 1>
----------
Player 1:
Evolved rules agent played Nimply(row=3, num_objects=3)
New state: <0 0 5 4 9 11 13 15 1>
----------
Player 0:
Fixed rules agent played Nimply(row=2, num_objects=5)
New state: <0 0 0 4 9 11 13 15 1>
----------
Player 1:
Evolved rules agent played Nimply(row=3, num_objects=3)
New state: <0 0 0 1 9 11 13 15 1>
----------
Player 0:
Fixed rules agent played Nimply(row=3, num_objects=1)
New state: <0 0 0 0 9 11 13 15 1>
----------
Player 1:
Evolved rules agent played Nimply(row=8, num_objects=1)
New state: <0 0 0 0 9 11 13 15 0>
----------
Player 0:
Fixed rules agent played Nimply(row=4, num_objects=9)
New

## Functions tests

### create_random_population()

In [None]:
nim = Nim(num_rows = 5, k = 8)
pop = create_random_population(nim = nim, num_individuals = 10)

print(len(pop))
print(pop[0])
print(pop[1])
print(pop[2])
print(pop[3])
print(pop[4])
print(pop[5])
print(pop[6])
print(pop[7])
print(pop[8])
print(pop[9])

10
Nimply(row=1, num_objects=3)
Nimply(row=3, num_objects=7)
Nimply(row=3, num_objects=3)
Nimply(row=2, num_objects=4)
Nimply(row=3, num_objects=1)
Nimply(row=4, num_objects=2)
Nimply(row=2, num_objects=3)
Nimply(row=1, num_objects=1)
Nimply(row=4, num_objects=1)
Nimply(row=1, num_objects=3)


### nim_addition_zero()

In [None]:
nim = Nim(5,3)
result = nim_addition_zero(nim)

print(result)

0


### evaluate_individual()

In [None]:
nim = Nim(5,3)
ply = Nimply(2,2)
print(nim)
print(ply)
result = evaluate_individual(nim,ply)
print(result)

<1 3 5 7 9>
Nimply(row=2, num_objects=2)
0


### sort_population()

In [None]:
nim = Nim(5,9)
ply1 = Nimply(2,2)
ply2 = Nimply(4,2)
ply3 = Nimply(4,9)
plys = [ply1,ply2,ply3]
result = sort_population(nim,plys)
print(result)

[Nimply(row=4, num_objects=9), Nimply(row=2, num_objects=2), Nimply(row=4, num_objects=2)]


### get_elites()

In [None]:
nim = Nim(num_rows = 5, k = 8)
p1 = Nimply(0,1)
p2 = Nimply(2,3)
p3 = Nimply(4,5)
p4 = Nimply(1,2)
current_pop = [p1,p2,p3,p4]
elites = get_elites(current_pop)

print(len(elites))
print(elites[0])
print(elites[1])

2
Nimply(row=0, num_objects=1)
Nimply(row=2, num_objects=3)


### is_valid_play()

In [None]:
# is_valid_play tests

# OK
# ply.row < 0
ply = Nimply(1,2)
nim = Nim(num_rows = 3, k = 4)
print(is_valid_play(nim, ply))

# ply.row < 0
ply = Nimply(-1,2)
nim = Nim(num_rows = 3, k = 4)
print(is_valid_play(nim, ply))

# ply.row > len(nim.row)
ply = Nimply(3,2)
nim = Nim(num_rows = 3, k = 4)
print(is_valid_play(nim, ply))

# ply.num_objects < 1
ply = Nimply(1,0)
nim = Nim(num_rows = 3, k = 4)
print(is_valid_play(nim, ply))

# ply.num_objects > nim._rows[ply.row]
ply = Nimply(0,2)
nim = Nim(num_rows = 3, k = 4)
print(is_valid_play(nim, ply))

# ply.num_objects > nim._k
ply = Nimply(1,5)
nim = Nim(num_rows = 3, k = 4)
print(is_valid_play(nim, ply))

True
False
False
False
False
False


### cross()

In [None]:
nim = Nim(num_rows = 5, k = 8)
p1 = Nimply(0,1)
p2 = Nimply(2,3)
p3 = Nimply(4,5)
p4 = Nimply(1,2)
p5 = Nimply(3,4)
current_pop = [p1,p2,p3,p4,p5]
elite_pop = [p1,p2,p3]
crossed_pop = cross(nim = nim, current_population = current_pop, elite_population = elite_pop)

print(len(crossed_pop))
print(crossed_pop[0])

1
Nimply(row=2, num_objects=4)


### mutate()

In [None]:
nim = Nim(num_rows = 5, k = 8)
p1 = Nimply(0,1)
p2 = Nimply(0,3)
p3 = Nimply(1,5)
p4 = Nimply(1,2)
p5 = Nimply(1,4)
current_pop = [p1,p2,p3,p4,p5]
mutated_pop = mutate(nim = nim, current_population = current_pop, mutation_ratio = 0.6)

print(len(mutated_pop))
print(mutated_pop[0])
print(mutated_pop[1])
print(mutated_pop[2])

3
Nimply(row=3, num_objects=6)
Nimply(row=0, num_objects=1)
Nimply(row=4, num_objects=3)
