# Lab 3: Policy Search

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The player **taking the last object wins**.

* Task3.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task3.2: An agent using evolved rules
* Task3.3: An agent using minmax
* Task3.4: An agent using reinforcement learning

## Instructions

* Create the directory `lab3` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

In [20]:
%load_ext autoreload
%autoreload 2

from task1_lib import gabriele, pure_random, fixed_rules_strategy
from task2_lib import run_GA, strategy_0, strategy_1
from nim_utils import evaluate, evaluate_GA, play_match

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Task 1

In [21]:
NUM_MATCHES = 100
NIM_SIZE = 10
K_SIZE = None

print(f"Win-rate against {gabriele.__name__}: {evaluate(fixed_rules_strategy, gabriele, NUM_MATCHES, NIM_SIZE, k_size=K_SIZE)}")
print(f"Win-rate against {pure_random.__name__}: {evaluate(fixed_rules_strategy, pure_random, NUM_MATCHES, NIM_SIZE, k_size=K_SIZE)}")


Win-rate against gabriele: 1.0
Win-rate against pure_random: 0.92


# Task 2

In [22]:
# Strategy 0
print(f"[info] - Strategy 0 results...")
genome_parameters = ["alpha", "beta", "gamma"]
num_generations = 150
best_genome = run_GA(genome_parameters, strategy_0, num_generations)

print(f"Win-rate against gabriele: {evaluate_GA(best_genome, strategy_0, gabriele, NUM_MATCHES, NIM_SIZE, k_size=K_SIZE)}")
print(f"Win-rate against pure_random: {evaluate_GA(best_genome, strategy_0, pure_random, NUM_MATCHES, NIM_SIZE, k_size=K_SIZE)}")

# Strategy 1
print(f"[info] - Strategy 1 results...")
genome_parameters = ["alpha", "beta"]
num_generations = 30
best_genome = run_GA(genome_parameters, strategy_1, num_generations)

print(f"Win-rate against gabriele: {evaluate_GA(best_genome, strategy_1, gabriele, NUM_MATCHES, NIM_SIZE, k_size=K_SIZE)}")
print(f"Win-rate against pure_random: {evaluate_GA(best_genome, strategy_1, pure_random, NUM_MATCHES, NIM_SIZE, k_size=K_SIZE)}")

[info] - Strategy 0 results...
[info] - Start generating the population


100%|██████████| 50/50 [00:03<00:00, 16.20it/s]


[info] - Evolving...


100%|██████████| 150/150 [03:32<00:00,  1.42s/it]


[info] - Best genome found is {'alpha': 0.33806644070961167, 'beta': 0.8983983212868427, 'gamma': 0.9974669535320325} with fitness: (0.92, 0.43)
Win-rate against gabriele: 0.81
Win-rate against pure_random: 0.43
[info] - Strategy 1 results...
[info] - Start generating the population


100%|██████████| 50/50 [00:02<00:00, 23.23it/s]


[info] - Evolving...


100%|██████████| 30/30 [00:25<00:00,  1.20it/s]

[info] - Best genome found is {'alpha': 0.9558146846435235, 'beta': 0.297372949592574} with fitness: (1.0, 0.99)
Win-rate against gabriele: 1.0
Win-rate against pure_random: 0.87





# Challenge (random tuning (0) vs rules-guided tuning (1))


In [32]:
import random
from nimply import Nim


def challenge(genome0, strategy_0, genome1, strategy_1, num_matches=100, nim_size=10, k_size=None):
    won = 0

    for m in range(num_matches):
        nim = Nim(nim_size, k=k_size)
        player = random.randint(0, 1)
        while nim:
            if player == 0:
                ply = strategy_0(nim, genome0)
            else:
                ply = strategy_1(nim, genome1)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / num_matches

genome0 = {'alpha': 0.33806644070961167, 'beta': 0.8983983212868427, 'gamma': 0.9974669535320325}
genome1 = {'alpha': 0.8191618758747532, 'beta': 0.10484407005864857}

challenge(genome0, strategy_0, genome1, strategy_1)

0.22

# Task 3

An agent using minmax

In [86]:
from nimply import Nimply
from nimply import Nim
from copy import deepcopy

"""
Generic implementation

def minimax(currentpos,depth,maximizingplayer):
    if depth==0:
        return currentpos
    if maximizingplayer: #we want to get the max
        maxEval=-infinity
        for each child of position
            eval=minimax(child,depth-1,false)
            maxEval=max(maxEval,eval)
        return maxEval

    else
        minEval=+infinity
        for each child of position
            eval=minimax(minEval,eval)
            minEvail=min(minEval,eval)
        return minEval
"""

"""
With possible_new_states(), 
you calculate the possible next states while making sure that a player can’t take more counters than those available on the board.
"""
def possible_moves(state: Nim):
    # retrieve the possible moves
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    # possible moves if k was implemented
    if state.k != None:
        possible_moves = [p for p in possible_moves if p[1] <= state.k]

    return possible_moves

def possible_new_states(state: Nim):
    # returns a list of outcome of all possible moves
    new_states_lists = []
    for p in possible_moves(state):
        temp_state = deepcopy(state)
        temp_state.nimming(Nimply(p[0], p[1]))
        new_states_lists.append(temp_state)
    return new_states_lists
"""
You evaluate a game position with evaluate(). 
If there are no counters left, then the function returns 1 if the maximizing player won the game and -1 if the other—minimizing—player won. 
If the game isn’t over, execution will continue to the end of the function and implicitly return None.
"""
def evaluate(state, is_maximizing):
    if state == 0:
        return 1 if is_maximizing else -1
        
#understand the best possible move to do (highest in score)
#def best_move(state):
#    for take in (1, 2, 3):
#        new_state = state - take
#        score = minimax(new_state, max_turn=False)
#        if score > 0:
#            break
#    return score, new_state

#minmax strategy
def minimax(state: Nim, is_maximizing):
    #ending position
    if (score := evaluate(state, is_maximizing)) is not None:
        return -1 if is_maximizing else 1


    if is_maximizing:
        scores = [
            minimax(new_state, is_maximizing=False)
            for new_state in possible_new_states(state)
        ]
        return max(scores)
    else:
        scores = [
            minimax(new_state, is_maximizing=True)
            for new_state in possible_new_states(state)
        ]
        return min(scores)
#max_turn=is_maximizing

def minmax_strategy(state:Nim):
    for move in possible_moves(state):
        #from lecture code
        tmp = deepcopy(state)
        tmp.nimming(Nimply(move[0], move[1]))
        #calculate the score
        score = minimax(tmp, is_maximizing=False)
        if score > 0:
            break
    return Nimply(move[0], move[1])

Oversimplified match 

In [87]:
import logging
import random
from task1_lib import gabriele, pure_random, fixed_rules_strategy

logging.getLogger().setLevel(logging.DEBUG)

# strategy = (pure_random, minmax_best_move)
# strategy = (minmax_best_move, pure_random)
strategy = (pure_random, minmax_strategy)
# strategy = (minmax_best_move, optimal_strategy)

nim = Nim(3)

logging.debug(f"status: Initial board  -> {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    player = 1 - player
winner = 1 - player
logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5>
DEBUG:root:status: After player 0 -> <1 0 5>


ValueError: max() arg is an empty sequence

# Task 4

## Oversimplified match

In [77]:
play_match(fixed_rules_strategy, pure_random, 10, k_size=None)

NameError: name 'play_match' is not defined