# LAB3 POLICY Search

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The player **taking the last object wins**.

* Task3.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task3.2: An agent using evolved rules
* Task3.3: An agent using minmax
* Task3.4: An agent using reinforcement learning


In [2036]:
import logging
from itertools import permutations
from collections import namedtuple
import random
# from typing import Callable
from copy import deepcopy,copy
from functools import reduce
# from operator import xor

In [2037]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [2038]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self.num_rows = num_rows
        self._rows = [i * 2 + 1 for i in range(num_rows)] # here we are putting the number of sticks in a single row 
        # like a list -> [1,3,5,7,....]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    # @property
    # def rows(self) -> tuple:
    #     return tuple(self._rows)

    # @property
    # def k(self) -> int:
    #     return self._k

    def play(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

    def possible_plays (self) -> list:
        possiblePlays=[]
        th = 0
        if self._k != None:
            th = self._k
        else:
            th = max(self._rows)
            
        # for r in range(self.num_rows):
        #     print('r is '+str(r))
        #     for p in range(self._rows[r]):
        #         if p+1 <=th:
        #             possiblePlays.append(Nimply(r,p+1))
        possiblePlays.append([Nimply(r,p+1) for r in range(self.num_rows) for p in range(self._rows[r]) if p+1 <= th or not self._rows ])
        # print([Nimply(r,p+1) for r in range(self.num_rows) for p in range(self._rows[r]) if p+1 <= th] )
        return possiblePlays[0]
        

## Task 3.1 Expert System

In [2039]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state._rows) if c > 0]) 
    # here we are choosing a random index only if the row of that index has sticks not empty
    num_objects = random.randint(1, state._rows[row])# now we are selecting a random number of sticks from the selected row
    # print((row, num_objects))
    return Nimply(row, num_objects)

In [2040]:
Game = Nim(4)
def calculate_nim_sum(rows):
    return reduce(lambda x, y: x ^ y, rows)


def expertSystem (Game: Nim) -> Nimply:
    best_ply = list()
    for i in Game.possible_plays():
        tmp = deepcopy(Game)
        tmp.play(i)
        best_ply.append((i,calculate_nim_sum(tmp._rows)))
    best_ply = sorted(best_ply,key= lambda x :x[1],reverse=False)
    retply=random.choice([num[0] for num in best_ply if num[1] == 0]) if best_ply[0][1]==0 else random.choice(best_ply)[0]
    return retply


strategy=[expertSystem,pure_random]

player = 0
while Game:
    ply = strategy[player](Game)
    # print(Game)
    Game.play(ply)
    print(f"status: After player {player} -> {Game}")
    player = 1 - player
winner = 1 - player
print(f"status: Player {winner} won!")
# print(ply) 

status: After player 0 -> <1 3 1 7>
status: After player 1 -> <1 3 0 7>
status: After player 0 -> <1 3 0 2>
status: After player 1 -> <1 1 0 2>
status: After player 0 -> <1 1 0 0>
status: After player 1 -> <1 0 0 0>
status: After player 0 -> <0 0 0 0>
status: Player 0 won!


## Task 3.2 : Evolved Rules 
### Base-Nim Strategy 

In [2041]:
def decimal_to_base3(decimal_number):
    if decimal_number == 0:
        return '0'

    base3_digits = []
    while decimal_number > 0:
        decimal_number, remainder = divmod(decimal_number, 3)
        base3_digits.append(str(remainder))

    base3_number = ''.join(base3_digits[::-1])
    return base3_number

def convert_to_base_nim(rows):
    base_nim_sizes = [int(decimal_to_base3(num)) for num in rows]
    xor_sum = 0
    for number in base_nim_sizes:
        xor_sum ^= number
    return xor_sum


In [2042]:
decimal_number = 10
base3_number = decimal_to_base3(decimal_number)
print(base3_number)


101


In [2043]:
g = Nim(3)
g._rows=[1,3,2]
print([i for i in g.possible_plays()])



[Nimply(row=0, num_objects=1), Nimply(row=1, num_objects=1), Nimply(row=1, num_objects=2), Nimply(row=1, num_objects=3), Nimply(row=2, num_objects=1), Nimply(row=2, num_objects=2)]


In [2044]:

def Base_Nim(Game: Nim) -> Nimply:
    best_ply = list()
   
    for i in Game.possible_plays():
        tmp = deepcopy(Game)
        tmp.play(i)
        best_ply.append((i,convert_to_base_nim(tmp._rows)))
    best_ply = sorted(best_ply,key= lambda x :x[1],reverse=False)
    retply=random.choice([num[0] for num in best_ply if num[1] != 0]) if best_ply[0][1]!=0 else random.choice(best_ply)[0]
    return retply

In [2045]:
Game = Nim(4)
    
strategy=[pure_random,Base_Nim]

player = 0
while Game:
    ply = strategy[player](Game)
    Game.play(ply)
    print(f"status: After player {player} -> {Game}")
    player = 1 - player
winner = 1 - player
print(f"status: Player {winner} won!")

status: After player 0 -> <1 3 3 7>
status: After player 1 -> <0 3 3 7>
status: After player 0 -> <0 1 3 7>
status: After player 1 -> <0 1 2 7>
status: After player 0 -> <0 1 2 6>
status: After player 1 -> <0 1 2 1>
status: After player 0 -> <0 1 0 1>
status: After player 1 -> <0 1 0 0>
status: After player 0 -> <0 0 0 0>
status: Player 0 won!


## Task 3.3: minmax

In [2046]:
def eval_terminal(Game):
    return sum(Game._rows)
    

In [2047]:
def minmax(Game : Nim) -> Nimply:
    
    val = eval_terminal(Game)
    possible =  Game.possible_plays()
    if val == 0 or len(possible) == 0 :
       return None,val
    
    evaluations = list()
    for ply in Game.possible_plays():
        tmp = deepcopy(Game)
        tmp.play(ply)
        _,val = minmax(tmp)
        evaluations.append((ply, -val))
        # if val == 0:
        #     break
    s = random.choice([num[0] for num in evaluations if num[1] == 0]) if evaluations[0][1]== 0 else list()
    return s if len(s)!=0 else max(evaluations,key= lambda k:k[1])[0]


In [2048]:
Game = Nim(3)
strategy=[expertSystem,minmax]

player = 0
while Game:
    ply = strategy[player](Game)
    Game.play(ply)
    print(f"status: After player {player} -> {Game}")
    player = 1 - player
winner = 1 - player
print(f"status: Player {winner} won!")


status: After player 0 -> <1 3 2>
status: After player 1 -> <0 3 2>
status: After player 0 -> <0 2 2>
status: After player 1 -> <0 1 2>
status: After player 0 -> <0 1 1>
status: After player 1 -> <0 0 1>
status: After player 0 -> <0 0 0>
status: Player 0 won!


## Task 3.4 : Reinforcement Learning