# Lab 3 - Nim

In [1]:
import logging
import random
from collections import namedtuple
from copy import deepcopy

In [2]:
logging.basicConfig(format="%(message)s", level=logging.INFO)

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [4]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [5]:
def nim_sum(elem: list):
  x = 0
  for e in elem:
    x = e ^ x
  return x

## Implementation

### Random action

In [6]:
def random_action(nim: Nim):
  """
    The agent perform a random action
  """
  row = random.choice([r for r, n in enumerate(nim._rows) if n > 0])
  if nim._k:
    num_obj = random.randint(1, min(nim._k, nim._rows[row]))
  else:
    num_obj = random.randint(1, nim._rows[row])
  
  return Nimply(row, num_obj)

### Task 3.1 - An agent using fixed rules based on nim-sum
Based on the explanation available here: https://en.wikipedia.org/wiki/Nim  

It wants to finish every move with a nim-sum of 0, called 'secure position' (then it will win if it does not make mistakes). 

In [7]:
def expert_action(nim: Nim):
  """
    The agent uses fixed rules based on nim-sum (expert-system)

    Returns the index of the pile and the number of pieces removed as a Nimply namedtuple
  """
  board = nim._rows
  k = nim._k

  # Winning move if there is only one row left
  tmp = [(i, r) for i, r in enumerate(board) if r > 0]
  if len(tmp) == 1:
    row, num_obj = tmp[0]
    if not k or num_obj <= k:
      return Nimply(row, num_obj) # Take the entire row


  # Compute the nim-sum of all the heap sizes
  x = nim_sum(board)

  if x > 0:
    # Current player on a insucure position -> is winning
    # --> Has to generate a secure position (bad for the other player)
    # --> Find a heap where the nim-sum of X and the heap-size is less than the heap-size.
    # --> Then play on that heap, reducing the heap to the nim-sum of its original size with X
    
    good_rows = [] # A list is needed because of k
    for row, row_size in enumerate(board):
      if row_size == 0:
        continue
      ns = row_size ^ x # nim sum
      if ns < row_size:
        good_rows.append((row, row_size)) # This row will have nim sum = 0
        
    for row, row_size in good_rows:
      board_tmp = deepcopy(board)
      for i in range(row_size):
       board_tmp[row] -= 1 
       if nim_sum(board_tmp) == 0:  # winning move
        num_obj = abs(board[row] - board_tmp[row])
        if not k or num_obj <= k:
          return Nimply(row, num_obj)
  
  # x == 0 or k force a bad move to the player
  # Current player on a secure position or on a bad position bc of k -> is losing
  # --> Can only generate an insicure position (good for the other player)
  # --> Perform a random action bc it doesn't matter
  return random_action(nim)
  

### Task 3.2 - An agent using evolved rules

In [None]:
pass

### Task 3.3 - An agent using minmax

In [None]:
pass

### Task 3.4 - An agent using reinforcement learning

In [None]:
pass

## Play a game

In [10]:
def evaluate(nim: Nim, n_matches=20, *, my_action=expert_action, opponent_action=expert_action, debug=False):
  """
    You are player 0
  """
  if debug:
    logging.getLogger().setLevel(logging.DEBUG)

  player_action = {
    0: my_action, # our champion
    1: opponent_action # our opponent
    }

  player = 1
  won = 0

  for m in range(n_matches):
    if m/n_matches > 0.5:
      player = 0  # You start
    else:
      player = 1  # Opponent start

    logging.debug(f'Board -> {nim}\tk = {nim._k}')
    logging.debug(f'Player {1-player} starts\n')
    while not sum(nim._rows) == 0:
      ply = player_action[player](nim)
      nim.nimming(ply)
      player = 1 - player
      logging.debug(f'player {player} -> {nim}\tnim_sum = {nim_sum(nim._rows)}')
    
    logging.debug(f'\n### Player {player} won ###\n')
    if player == 0:
      won += 1
      
  return won/n_matches

### Choose the game parameters and play

In [18]:
nim = Nim(7, 3)
evaluate(nim, 100, my_action=expert_action, opponent_action=random_action)

0.49