# Lab 3 - Nim  
## Task3.3: An agent using minmax   

In [1]:
import logging
import random
from copy import deepcopy

from nim import Nimply, Nim

In [2]:
logging.basicConfig(format="%(message)s", level=logging.INFO)

## Implementaton

In [3]:
def hash_id(state: list, player: int):
  """
    Computes the hash of the tuple tuple(state) + (player, ), where:
    - state is the list of rows, i.e. the board
    - player is either 0 or 1
  """
  assert player == 1 or player == 0
  return hash(tuple(sorted(state)) + (player, ))


Node class

In [4]:
class Node():
  """
    State of the grapth that contains:
    - id: hash of tuple(state)+(player,)
    - state: copy of the state (nim._rows)
    - player: either 0 or 1
    - utility: value initialized to 0, becomes either -inf or +inf
    - children: list of nodes
    - parents: list of nodes 
    - actions: list of possible actions as Nimply objects
  """

  def __init__(self, state: list, player: int):
    assert player == 1 or player == 0
    
    self.id = hash_id(state, player)
    self.state = deepcopy(state)
    self.player = player # Me (0) -> max ; Opponent (1) -> min
    
    self.utility = 0  # -inf if I lose, +inf if I win
    self.children = []
    self.parents = []
    self.possible_acitions() # creates self.actions


  def __eq__(self, other):
    return isinstance(other, Node) and self.state == other.state and self.player == other.player


  def link_parent(self, parent):
    """
      Links the actual node with the parent node
    """
    assert isinstance(parent, Node)
    assert self.player != parent.player

    if parent not in self.parents:
      self.parents.append(parent)


  def link_child(self, child):
    """
      Links the child node to the actual node
    """
    assert isinstance(child, Node)
    assert self.player != child.player

    if child not in self.children:
      self.children.append(child)


  def is_leaf(self):
    return sum(self.state) == 0

  
  def leaf_utility(self):
    """
      Returns the utility of a leaf:
      - player 0 on leaf --> I lost, then utility = -inf
      - player 1 on leaf --> I won, then utility = +inf 
    """
    if self.is_leaf():
      if self.player == 0: 
        return float('-inf')     # I lost (the opponent took the last piece) 
      else: return float('+inf') # I won


  def possible_acitions(self, k=None):
    """
      Computes all the possible action reachable from the actual node
      and saves them inside self.actions 
    """
    self.actions = []
    
    if self.is_leaf():
      return

    not_zero_rows = [(r, n) for r, n in enumerate(self.state) if n > 0]
    for row, num_obj in not_zero_rows:  
      while num_obj > 0:
        if k and num_obj > k:
          num_obj = k
          continue
        self.actions.append(Nimply(row, num_obj))
        num_obj -= 1


Game Tree class

In [5]:
class GameTree():
  """
    Game Tree comosed of nodes that could have multiple parents and multiple children.  
    
    The roots is one:
    - Starting state + starting player = 0 
    The leafs are two:
    - State of all zeros + finish player = 0  (I lose)
    - State of all zeros + finish player = 1  (I win)
    
    The class contains the following attributs:
    - k: nim._k
    - start_player: either 0 or 1
    - dict_id_node: dictionary that maps the node id to the actual node
    - dict_id_utility_action: dictionary that maps the node id to a tuple (utility, action), where:
      - utility: utility of the node
      - action: better action to take (Nimply object)
    - root: root node (Node object)    
  """

  def __init__(self, nim: Nim, start_player=0):
    self.k = nim._k
    self.start_player = start_player
    self.dict_id_node = {}    
    self.dict_id_utility_action = {} 
    
    self.root = Node(nim._rows, start_player)
    self.dict_id_node[self.root.id] = self.root


  def min_max(self):
    """
      MinMax using a recursive function that expands a node by trying every possible action of that node.  

      The recursive function returns the utility of the children and the parent will select  
       the best utility according to who is playing at that layer:
      - if player 1 is playing, than minimize the reward (look for utility = -inf)
      - if player 0 is playing, than maximize the reward (look for utility = +inf)

      The alpha-beta pruning is implemented:  
       if the player finds a child with the desired utility, it stops looking
      becouse he will win choosing that action to go to that state.
    """

    def recursive_min_max(node: Node):  
      # Stop condition
      if node.id in self.dict_id_utility_action:
        logging.debug(f'State {node.state} ({node.player}) already computed: {self.dict_id_utility_action[node.id][0]}')
        return self.dict_id_utility_action[node.id][0] # just the utility value
      
      if node.is_leaf():
        node.utility = node.leaf_utility()
        logging.debug(f'Leaf player {node.player}')
        return node.utility


      # Recursive part
      for ply in node.actions:
        row, num_obj = ply
        
        # Check rules
        assert node.state[row] >= num_obj
        assert self.k is None or num_obj <= self.k

        # Create the child
        child_state = deepcopy(node.state)
        child_state[row] -= num_obj # nimming
        child_id = hash_id(child_state, 1 - node.player)
        if child_id in self.dict_id_node: # node already exists
          child = self.dict_id_node[child_id]
        else: # create the new node
          child = Node(child_state, 1 - node.player)
        
        # Link parent and child
        node.link_child(child)
        child.link_parent(node)

        # Recursion
        best_utility = recursive_min_max(child)
        
        # Update the values
        opp_wins = node.player == 1 and best_utility == float('-inf')  # opponent will win
        i_win = node.player == 0 and best_utility == float('+inf')  # I will win
        if i_win or opp_wins:
          node.utility = best_utility
          self.dict_id_utility_action[node.id] = (node.utility, ply)
          return node.utility
          
      # This player will surelly lose otherwise he would have returned before
      node.utility = best_utility
      ply = random.choice(node.actions) # it doesn't matter the ply, he will lose
      self.dict_id_utility_action[node.id] = (node.utility, ply)
    
      return node.utility
    
    
    utility = recursive_min_max(self.root)
    if self.start_player == 0 and utility == float('+inf'):
      logging.info('The starting player will WIN')
      logging.info(f'--> move {self.dict_id_utility_action[self.root.id][1]}')
      return self.dict_id_utility_action[self.root.id]
    else:
      logging.info('The starting player will LOSE')
      return self.dict_id_utility_action[self.root.id] 
    

  def best_action(self, node: Node):
    """
      Returns the best aciton at that state
    """
    assert self.root.id in self.dict_id_utility_action
    assert node.id in self.dict_id_utility_action

    return self.dict_id_utility_action[node.id]



## Play

In [7]:
nim = Nim(5)
game_tree0 = GameTree(nim, start_player=0) # I start
game_tree1 = GameTree(nim, start_player=1) # Opponent starts

game_tree0.min_max()
game_tree1.min_max()


The starting player will WIN
--> move Nimply(row=4, num_objects=9)
The starting player will LOSE


(-inf, Nimply(row=4, num_objects=9))