(c) Laura Amoroso s313813

# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [1]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
from numpy.random import normal
import numpy as np
from math import ceil
import random
from copy import deepcopy

## The *Nim* and *Nimply* classes

In [2]:
Nimply = namedtuple("Nimply", "row, num_objects")
sigma = 0.3


class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)
    @property
    def k(self)-> int:
        return self._k
    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [3]:

def pure_random(state: Nim,weights=None) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)
    
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    #print(tmp)
    xor = tmp.sum(axis=0) % 2
    #print(xor)
    return int("".join(str(_) for _ in xor), base=2)


def gabriele(state: Nim,weights=None) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1 ,min(c + 1, state.k)  if state.k else c+1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))

def player1(state:Nim,weights=None)->Nimply:
    """Pick always the maximum possible number of the biggest row""" 
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1 ,min(c + 1, state.k)  if state.k else c+1)]
    return Nimply(*max(possible_moves, key=lambda m: (m[1])))

def player2(state:Nim,weights=None)->Nimply:
    """Pick always the 1 number of the biggest row""" 
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1,min(c + 1, state.k)  if state.k else c+1)]
    row=max(possible_moves, key=lambda m: (m[1]))
    return Nimply(row[0],1)


def player3(state:Nim,weights=None)->Nimply:
    """Pick always the 1 number of the lowest row""" 
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1 ,min(c + 1, state.k)  if state.k else c+1)]
    row=min(possible_moves, key=lambda m: (m[1]))
    return Nimply(row[0],1)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1 ,min(c + 1, raw.k)  if raw.k else c+1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked

def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## TASK2.1

Since we noticed that the playing with the optimal strategy (nim sum != 0) we lose against a player who follows the nim sum =0 and sometimes also against gabriele, we enhanced it by following the nim sum = 0 for all the moves except the last ones where we choose the nim sum !=0 

example: 

INFO:root:status: <0 2 1 2>

INFO:root:ply: player 0 plays Nimply(row=3, num_objects=2)

INFO:root:status: <0 2 1 0>

INFO:root:ply: player 1 plays Nimply(row=1, num_objects=2)

INFO:root:status: <0 0 1 0> 

INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)

INFO:root:status: <0 0 0 0>

INFO:root:status: Player 1 won!

here the optimal (0) loses against pure_random because it should have follow the nin sum = 0


In [4]:
def expert_agent(state: Nim,weights=None) -> Nimply:
    """Follow the min sum ==0 expect for the last moves""" 
    analysis = analize(state)
    
    one_stick_row=state.rows.count(1)
    more_one_stick_rows = len(state.rows)-one_stick_row-state.rows.count(0)
    

    #if it is left just one row with more than one stick
    #apply different rules
    if more_one_stick_rows==1:
           
        element=0
        for r in state.rows:
            if r>1:
                element=r
        row_index=state.rows.index(element)
        #ex INFO:root:status: <0 2 1 1 1>
        #if the number of rows with 1 is even leave 1 stick
        #otherwise leave 0
        if one_stick_row % 2==0:
            return Nimply(row_index,element-1)
        else:
            return Nimply(row_index,element)  
    
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    
    ply = random.choice(spicy_moves)
    return ply


In [5]:
def match(strategy0,strategy1,weights)->bool:
    #logging.getLogger().setLevel(logging.INFO)
    strategy=(strategy0, strategy1)
    nim = Nim(4)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim,weights)
        #print(f"ply: player {player} plays {ply}")
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    #logging.info(f"status: Player {player} won!")
    return player

## TASK2.2

To implement the evelvoing strategy we create an ES algorithm that optimizes a set of weights that indicates the different importance of the strategies. By including the expert_agent we expect our agent to converge to that strategy.

How strategy chooses the move by taking a majority voting of the suggested moves by each one of the predefined strategies weighted by the current weights.

In [6]:
num_eras=50
strategies=[pure_random, gabriele, optimal, player1, player2,player3,expert_agent]

def evolving_strategy(state:Nim,w)->Nimply:
    moves=[]
    dic={}   
    voting={}
    for s in strategies:
        #I collect the suggested moves for each strategy
        moves.append(s(state))

    for i in range(len(moves)) :
    
        if moves[i] in dic.keys():
            dic[moves[i]].append(w[i])
        else:
            dic[moves[i]]=[w[i]]

    for key, value in dic.items():
    
        #for each move i count how many strategies suggested it 
        #and sum the relative weights
        voting[key]=sum(value)+len(value)
  
    max_key = max(voting, key=voting.get)
    return max_key


def fitness(w):
    """computes how many games we win against 3 players""" 
    counter=0
    #index=w.index(max(w))
    #print(index)
    for era in range(num_eras):
        
        if(era<num_eras/2):
            if match(evolving_strategy,expert_agent,w)==0:
                counter+=1
            
            if match(evolving_strategy,pure_random,w)==0:
                counter+=1
                      
            if match(evolving_strategy,gabriele,w)==0:
                counter+=1 
           
    
        else:
            if match(expert_agent,evolving_strategy,w)==1:
                counter+=1
            if match(pure_random,evolving_strategy,w)==1:
                counter+=1
            
            if match(gabriele,evolving_strategy,w)==1:
                counter+=1 
            
    #print("games won ", counter)
    return counter

    

## (1+λ) Strategy

This strategy creates λ new individuals at each generation starting from 1 parent, and then keeps only the best individual, with respect to the fitness, among the parent and the offspring

In [16]:
num_iterations=100
l=5
#initialize the weights
weights=[]
for _ in range(len(strategies)):
    weights.append(random.random())

print("weights",weights)

index=weights.index(max(weights))

print("max index", index)

prev_won=0
new_won=0
improvements=0
for it in range(num_iterations):
    print("iteration n: ", it)
    counter=fitness(weights)
    
    new_counters=[]
    new_counters={}
    for _ in range(l):
        new_weights=[]
        for i in range(len(weights)):
            #tweak the weights
            new_weights.append(weights[i]+normal(0.0,sigma))
        
        new_fitness=fitness(new_weights)
        if(new_fitness>counter):
            improvements+=1
            weights=new_weights
            counter=new_fitness

    #iterations after which check
    check_it=num_iterations/10
    if (it+1)%check_it==0:
        #baes on how many improvements we have we modify the sigma
        if improvements/check_it>1/5:
            sigma*=1.1
        else:
           sigma/=1.1
        improvements=0
    
           
    print("weights", weights)
    #print("won matches", counter)

index=weights.index(max(weights))

print("max index", index)
print("weights", weights)
print("max fitness",fitness(weights))


weights [0.8271966058312479, 0.11464475231861959, 0.7525265377060157, 0.602530329612847, 0.41638039929308235, 0.29467170061045145, 0.4529200347740955]
max index 0
iteration n:  0
weights [0.7290930998314834, -0.23813704086807322, 0.1420930965886612, 0.4668923014320535, -0.09558685964625094, 1.0961199027210644, 1.2272911164545373]
iteration n:  1
weights [0.9705434398770325, -0.35864615184389315, 1.0257034023569067, 0.7575414709143173, 0.2121102108283493, 0.5899163113200415, 1.4725912940513208]
iteration n:  2
weights [0.9705434398770325, -0.35864615184389315, 1.0257034023569067, 0.7575414709143173, 0.2121102108283493, 0.5899163113200415, 1.4725912940513208]
iteration n:  3
weights [0.9705434398770325, -0.35864615184389315, 1.0257034023569067, 0.7575414709143173, 0.2121102108283493, 0.5899163113200415, 1.4725912940513208]
iteration n:  4
weights [0.929194772106, -0.5616687972280173, 0.6697667387399613, 0.940334930499662, 0.16247391506953657, 0.6024011498888162, 1.0500883098447507]
itera

As we espected all the weights are negative or close to 0 expect for the last one, related to the expert agent

## (μ+λ) Strategy

This strategy creates λ new individuals at each generation starting from μ parents, and then keeps only the best μ individuals, with respect to the fitness, among the parents and the offspring. We obtained the same convergence as before

In [14]:
num_iterations=50
mi=5
l=10
#initialize the weights
parents=[]
population=[]
weights=[]

fitnesses=[]
prev_won=0
new_won=0
improvements=0

for _ in range(mi):
    weights=[]
    for _ in range(len(strategies)):
        weights.append(random.random())

    parents.append(weights)
    population.append(weights)

print("parents",parents)

for it in range(num_iterations):
    print("iteration n: ", it)
    #counter=fitness(weights)
    fitnesses=[]
    for p in parents:
        for _ in range(round(l/mi)):
            new_weights=[]
            for i in range(len(p)):
                #tweak the weights
                new_weights.append(p[i]+normal(0.0,sigma))
            population.append(new_weights)
    

    for i in population:
        fitnesses.append(fitness(i))
    
    indexed_list = list(enumerate(fitnesses))

    # Sort the list of tuples based on the values (ascending order)
    sorted_indexed_list = sorted(indexed_list, key=lambda x: x[1])

    # Get the indexes of the best N elements

    best_N_indexes = [index for index, _ in sorted_indexed_list[-mi:]]
    parents=[population[index] for index in best_N_indexes]
    print(parents)
    population=[p for p in parents]
    #for p in parents:
        #print(p)


current_max=[]
max_fitness=0
for w in parents:
    current_fitness=fitness(w)
    if current_fitness> max_fitness:
        current_max=w
        max_fitness=current_fitness
print(current_max)
print(max_fitness)


parents [[0.6353925388851651, 0.4144835310163959, 0.7134919014338645, 0.671353963588742, 0.2808807240969864, 0.8000652183822166, 0.8960834504254802], [0.8677706785144822, 0.017987516412267923, 0.13916465741655948, 0.5266687631808883, 0.3814469356398801, 0.49442357758949396, 0.9710470611261773], [0.6416964055718516, 0.08068023478311537, 0.4554262740525641, 0.9374839400862424, 0.052680090753117303, 0.7191132702531914, 0.22779291814028668], [0.16988449517020687, 0.4376733405084927, 0.3516916709042751, 0.9942419769779102, 0.7323803796801697, 0.1740458470442855, 0.30422340016077387], [0.6016617839757864, 0.8122383284023322, 0.010867452471477357, 0.4553740849753266, 0.6106702393412428, 0.5477232478045836, 0.8567709659511811]]
iteration n:  0
[[0.6418114698856335, 0.46420751169297253, 1.1182541534961588, 0.7333539574857595, 0.07880849919347435, 0.4710777288247864, 0.7432301965276457], [1.0430850934084168, 0.3278191746523293, 0.2584902871229628, 0.7083974260005477, 0.1603224480416525, 0.445904