Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: Policy Search

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The player **taking the last object wins**.

* Task3.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task3.2: An agent using evolved rules
* Task3.3: An agent using minmax
* Task3.4: An agent using reinforcement learning

## Instructions

* Create the directory `lab3` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.

## Deadlines ([AoE](https://en.wikipedia.org/wiki/Anywhere_on_Earth))

* Sunday, December 4th for Task3.1 and Task3.2
* Sunday, December 11th for Task3.3 and Task3.4
* Sunday, December 18th for all reviews

In [647]:
import logging
from collections import namedtuple
import random
from typing import Callable

## The *Nim* and *Nimply* classes

In [648]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [649]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

## Task 3.1 - Fixed rules


In [650]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

## My strategies

In [651]:
def active_rows_number(state: Nim) -> int: 
    return sum(o > 0 for o in state.rows)

def pick_max_from_highest(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the highest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (m[0], m[1])))

def pick_min_from_highest(state: Nim) -> Nimply:
    """Pick always the minimum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (m[0], -m[1])))

def count_rows_and_choose(state: Nim) -> Nimply:
    rows = active_rows_number(state)
    if rows % 2 == 0:
        return pick_min_from_highest(state)
    else:
        return pick_max_from_highest(state)

def pick_odd_number_of_elements(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    ns = [n for n in range(1, state.rows[row]+1) if n%2!=0]
    num_objects = random.choice(ns)
    return Nimply(row, num_objects)

def pick_even_number_of_elements(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    if state.rows[row] == 1:
        return Nimply(row, 1)
    ns = [n for n in range(1, state.rows[row]+1) if n%2==0]
    num_objects = random.choice(ns)
    return Nimply(row, num_objects)

In [652]:
NUM_MATCHES = 100
NIM_SIZE = 11

def evaluate(strategy: Callable) -> float:
    opponent = (strategy, pure_random)
    won = 0

    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE)
        player = 0
        while nim:
            ply = opponent[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / NUM_MATCHES

In [653]:
evaluate(pick_max_from_highest)

0.6

In [654]:
evaluate(pick_min_from_highest)

0.41

In [655]:
evaluate(pick_odd_number_of_elements)

0.41

In [656]:
evaluate(pick_even_number_of_elements)

0.65

In [657]:
evaluate(count_rows_and_choose)

0.88

## Task 3.2 - Evolving agents

In [658]:
def make_my_first_strategy(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:

        if random.random() < genome["p"]:
            ply = count_rows_and_choose(state)
        else:
            ply = pure_random(state)
        return ply

    return evolvable

In [659]:
def make_my_second_strategy(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:

        if random.random() < genome["p"]:
            ply = pick_odd_number_of_elements(state)
        else:
            ply = pick_even_number_of_elements(state)
        return ply

    return evolvable

In [660]:
def make_my_third_strategy(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
            
        if random.random() < genome["p"]:
            return pick_min_from_highest(state)
        else:
            return pick_max_from_highest(state)
    return evolvable

# Try to evolve
## Setting parameters

In [661]:
Individual = namedtuple("Individual",["genome", "fitness"])
POPULATION_SIZE = 100
NUM_GENERATIONS = 101
OFFSPRING_SIZE = 30
MUT_RATE = 0.5

def compute_fitness(genome, strategy):
    return evaluate(strategy(genome))

def tournament(population, tournament_size=2): 
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness) 

def mutation(g):
    if random.random() < 0.5:
        g_mut = {"p": random.random()}
    else:
        g_mut = {"p": (g["p"]+0.1)%1}
    return g_mut

def crossover(g1, g2):
    p1 = g1["p"]
    p2 = g2["p"]
    p_cross = (p1+p2)/2
    g_cross = {"p": p_cross}
    return g_cross   

## Evolution

In [662]:
def my_genetic_algorithm(population, strategy):
    for generation in range(NUM_GENERATIONS):
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            if random.random() < MUT_RATE:
                p = tournament(population)
                o = mutation(p.genome)
            else:
                p1 = tournament(population)                 # promising genome 1
                p2 = tournament(population)                 # promising genome 2
                o = crossover(p1.genome, p2.genome)
            f = compute_fitness(o, strategy)
            offspring.append(Individual(o,f))

        population += offspring
        population = sorted(population, key = lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]

        best_so_far = population[0]
        if(generation % 10 == 0):
            print(f"GEN #{generation}\t\tGENOME: {best_so_far.genome}\tFITNESS: {best_so_far.fitness}")

In [663]:
def evolution(strategy):
    population = list()
    for _ in range(POPULATION_SIZE):
        p = random.random()
        genome = {"p": p}
        population.append(Individual(genome, compute_fitness(genome, strategy)))

    my_genetic_algorithm(population, strategy)

In [664]:
evolution(make_my_first_strategy)

GEN #0		GENOME: {'p': 0.9333501176045169}	FITNESS: 0.89
GEN #10		GENOME: {'p': 0.9909345745159256}	FITNESS: 0.93
GEN #20		GENOME: {'p': 0.9909345745159256}	FITNESS: 0.93
GEN #30		GENOME: {'p': 0.9909345745159256}	FITNESS: 0.93
GEN #40		GENOME: {'p': 0.9909345745159256}	FITNESS: 0.93
GEN #50		GENOME: {'p': 0.9909345745159256}	FITNESS: 0.93
GEN #60		GENOME: {'p': 0.9988912130583779}	FITNESS: 0.95
GEN #70		GENOME: {'p': 0.9988912130583779}	FITNESS: 0.95
GEN #80		GENOME: {'p': 0.9988912130583779}	FITNESS: 0.95
GEN #90		GENOME: {'p': 0.9988912130583779}	FITNESS: 0.95
GEN #100		GENOME: {'p': 0.9988912130583779}	FITNESS: 0.95


In [667]:
evolution(make_my_second_strategy)

GEN #0		GENOME: {'p': 0.15498759917761262}	FITNESS: 0.63
GEN #10		GENOME: {'p': 0.26342618705033827}	FITNESS: 0.66
GEN #20		GENOME: {'p': 0.26342618705033827}	FITNESS: 0.66
GEN #30		GENOME: {'p': 0.3994988135854459}	FITNESS: 0.68
GEN #40		GENOME: {'p': 0.3994988135854459}	FITNESS: 0.68
GEN #50		GENOME: {'p': 0.3994988135854459}	FITNESS: 0.68
GEN #60		GENOME: {'p': 0.3994988135854459}	FITNESS: 0.68
GEN #70		GENOME: {'p': 0.3994988135854459}	FITNESS: 0.68
GEN #80		GENOME: {'p': 0.3994988135854459}	FITNESS: 0.68
GEN #90		GENOME: {'p': 0.3994988135854459}	FITNESS: 0.68
GEN #100		GENOME: {'p': 0.3994988135854459}	FITNESS: 0.68


In [666]:
evolution(make_my_third_strategy)

GEN #0		GENOME: {'p': 0.25456029655297585}	FITNESS: 0.7
GEN #10		GENOME: {'p': 0.1242210242074703}	FITNESS: 0.73
GEN #20		GENOME: {'p': 0.24297626293277547}	FITNESS: 0.74
GEN #30		GENOME: {'p': 0.24297626293277547}	FITNESS: 0.74
GEN #40		GENOME: {'p': 0.24297626293277547}	FITNESS: 0.74
GEN #50		GENOME: {'p': 0.10956652901021742}	FITNESS: 0.76
GEN #60		GENOME: {'p': 0.10956652901021742}	FITNESS: 0.76
GEN #70		GENOME: {'p': 0.10956652901021742}	FITNESS: 0.76
GEN #80		GENOME: {'p': 0.10956652901021742}	FITNESS: 0.76
GEN #90		GENOME: {'p': 0.10956652901021742}	FITNESS: 0.76
GEN #100		GENOME: {'p': 0.10956652901021742}	FITNESS: 0.76
