Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [335]:
import logging
from pprint import pprint, pformat
from typing import Callable
from collections import namedtuple
import random
from copy import deepcopy
from random import gauss
import matplotlib.pyplot as plt
import random

## The *Nim* and *Nimply* classes

In [336]:
NUM_ROWS = 5
K = None
NUM_MATCHES = 300
λ = 20
σ = 0.1
GENERATION_SIZE = 200 // λ
random.seed(42)


In [337]:
Nimply = namedtuple("Nimply", "row, num_objects")


# Definisci la classe namedtuple con i nomi degli elementi
Solution = namedtuple('Solution', ['w1', 'w2', 'w3'])




In [338]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        # Initialize the Nim object with given number of rows and an optional maximum object limit
        self._rows = [i * 2 + 1 for i in range(num_rows)]   # Create a list of odd numbers as row sizes
        self._k = k   # Store the maximum object limit

    def __bool__(self):
        # Return True if there are objects remaining in the game, False otherwise
        return sum(self._rows) > 0

    def __str__(self):
        # Return a string representation of the object
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        # Return the rows as a tuple
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        # Perform a nimming move by removing objects from a specified row
        row, num_objects = ply   # Unpack the tuple
        assert self._rows[row] >= num_objects   # Check if the specified row has enough objects
        assert self._k is None or num_objects <= self._k   # Check if the number of objects is within the maximum limit
        self._rows[row] -= num_objects   # Subtract the number of objects from the specified row



## Sample (and silly) startegies 

In [339]:

def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    # Select a row that has at least one object remaining
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    # Randomly choose a number of objects to remove from the selected row
    num_objects = random.randint(1, state.rows[row])
    # Create and return a Nimply object representing the chosen move
    return Nimply(row, num_objects)



In [340]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    # Generate a list of possible moves
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    # Select the move with the maximum number of objects from the lowest row
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))



In [341]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [342]:
logging.getLogger().setLevel(logging.INFO)

strategy = (optimal, pure_random)

nim = Nim(5) # define the game with 5 rows
logging.info(f"init : {nim}")
player = 0
while nim: # while the game is not over --> sum of all rows is not zero
    ply = strategy[player](nim) # get the move from the player 0
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply) # perform the move
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=5)
INFO:root:status: <1 3 5 7 4>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 7 4>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=2)
INFO:root:status: <0 3 5 5 4>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=2)
INFO:root:status: <0 3 3 5 4>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=2)
INFO:root:status: <0 3 1 5 4>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=3)
INFO:root:status: <0 0 1 5 4>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=3)
INFO:root:status: <0 0 1 5 1>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 0 5 1>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 4 1>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 3 1>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0

## (1+λ)-ES

In [343]:
def state_info(state: Nim) -> dict:
    info = dict()
    info["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) 
    ]
    # info["active_rows_number"] = sum(o > 0 for o in state.rows)
    info["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    info["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    info["random_row"] = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    # info["nim_sum"] = nim_sum(state)

    

    return info

In [344]:
def evolved_strategy(genome) -> Callable:
    def adaptive(state: Nim) -> Nimply:
        data = state_info(state)

        selected_strategy = random.choices(range(len(genome)), weights=genome)[0]

        if selected_strategy == 0:
            ply = Nimply(
                data["shortest_row"],
                random.randint(1, state.rows[data["shortest_row"]]),
            )
        elif selected_strategy == 1:
            ply = Nimply(
                data["longest_row"], random.randint(1, state.rows[data["longest_row"]])
            )
        elif selected_strategy == 2:
            ply = Nimply(
                data["random_row"], random.randint(1, state.rows[data["random_row"]])
            )
        else:
            ply = optimal(state)
        return ply

    return adaptive

In [345]:
# import random

# def select_random_number(numbers):
#     # Assicura che la somma dei numeri sia 1
#     total_sum = sum(numbers)
#     if abs(total_sum - 1) > 1e-10:
#         raise ValueError("La somma dei numeri deve essere 1.")

#     # Seleziona un numero in modo casuale ponderando la probabilità
#     selected_number = random.choices(range(len(numbers)), weights=numbers)[0]
    
#     return selected_number

# # Esempio di utilizzo
# numbers = [0.9, 0.05, 0.05]
# selected_index = select_random_number(numbers)

# # Stampa il risultato
# print(f"Selected number: {selected_index + 1}")


In [346]:
def fitness(strategy: Callable) -> int:
    won = 0
    opponent = (strategy, optimal)

    for _ in range(NUM_MATCHES):
        nim = Nim(NUM_ROWS)
        player = 0

        while nim:
            ply = opponent[player](nim)
            nim.nimming(ply)  # perform the move
            player ^= 1

        if player == 0:
            won += 1

    return won  # return the number of matches won

In [347]:
import numpy as np


def generate_offsprings(offspring) -> list:
    output = []

    for _ in range(λ):
        new_offspring = [
            np.clip(val + np.random.normal(0, σ), 0, 1) for val in offspring
        ]

        current_sum = sum(new_offspring)

        if current_sum != 0:
            scale_factor = 1 / current_sum
            # Apply scale factor to each value
            values = [val * scale_factor for val in new_offspring]
        else:
            values = new_offspring

        output.append(values)

    return output

## Adaptive (1,λ)-ES

In [348]:

# choosen_probability = list()
# solutions_list = list()
# stats = [0, 0]
# counter = 0
# for n in range(GENERATION_SIZE):
#     print("Sigma for generation", n + 1, "is:", σ)
#     offsprings = generate_offsprings(current_solution)
#     offsprings.append(current_solution)

#     evals = [
#         (offspring, fitness(evolved_strategy(offspring))) for offspring in offsprings
#     ]
#     previous_solution = evals[λ]
#     for i in range(λ):
#         if evals[i][1] > previous_solution[1]:
#             counter += 1

#     stats[1] += counter
#     stats[0] += λ

#     if (n + 1) % 1 == 0:
#         if stats[0] / stats[1] < 1 / 5:
#             σ /= 1.2
#         elif stats[0] / stats[1] > 1 / 5:
#             σ *= 1.2
#         steps = [0, 0]

#     evals.sort(key=lambda x: x[1], reverse=True)

#     # pprint(evals)

#     current_solution = evals[0][0]
#     choosen_probability.append(current_solution)
#     solutions_list.append(evals[0][1])

#     print(f"Best result for generation {n+1} is:", evals[0])


# plt.plot(range(len(choosen_probability)), choosen_probability)
# plt.xlabel("Generation")
# plt.ylabel("Probability")
# plt.show()

# plt.plot(range(GENERATION_SIZE), solutions_list)
# plt.xlabel("Generation")
# plt.ylabel("Number of wins")
# plt.show()



## (1,λ)-ES

In [349]:

# current_solution = Solution(w1=0.33, w2=0.33, w3=0.34)

current_solution = (0.25,0.25,0.25,0.25)


choosen_probability = list()
solutions_list = list()
# stats = [0, 0]
for n in range(GENERATION_SIZE):
    # offspring <- select λ random points mutating the current solution
    # print("Starting probability for generation", n+1, "is:", current_solution)
    offsprings = generate_offsprings(current_solution)
    offsprings.append(current_solution)
    # evaluate and select best
    
    evals = [
        (offspring, fitness(evolved_strategy(offspring))) for offspring in offsprings
    ]


    evals.sort(key=lambda x: x[1], reverse=True)
    #pprint(evals)

    current_solution = evals[0][0]
    choosen_probability.append(current_solution)
    solutions_list.append(evals[0][1])

    print(f"Best result for generation {n+1} is:", evals[0])




# curve_names = ['Strategy 1', 'Strategy 2', 'Strategy 3']

# for i in range(len(choosen_probability)):
#     plt.plot(range(len(choosen_probability[i])), choosen_probability[i], label=curve_names[i])

# plt.xlabel("Generation")
# plt.ylabel("Probability")
# plt.legend()
# plt.show()


plt.plot(range(GENERATION_SIZE), solutions_list)
plt.xlabel("Generation")
plt.ylabel("Number of wins")
plt.show()



Best result for generation 1 is: ([0.19897777068568007, 0.2678764306320365, 0.27844608191135656, 0.2546997167709269], 127)
Best result for generation 2 is: ([0.1381573652914832, 0.37268944381807706, 0.1922400879714012, 0.2969131029190387], 123)
Best result for generation 3 is: ([0.2316536052855178, 0.34874768821892377, 0.1049928765407147, 0.3146058299548437], 138)


## (1,λ)-ES

In [None]:
current_solution = 0.5
choosen_probability = list()
solutions_list = list()

for n in range(GENERATION_SIZE):
    # offspring <- select λ random points mutating the current solution
    # print("Starting probability for generation", n+1, "is:", current_solution)
    offsprings = generate_offsprings(current_solution)

    # evaluate and select best
    evals = [
        (offspring, fitness(evolved_strategy(offspring))) for offspring in offsprings
    ]


    evals.sort(key=lambda x: x[1], reverse=True)
    pprint(evals)

    current_solution = evals[0][0]
    choosen_probability.append(current_solution)
    solutions_list.append(evals[0][1])

    print(f"Best result for generation {n+1} is:", evals[0])


plt.plot(range(len(choosen_probability)), choosen_probability)
plt.xlabel("Generation")
plt.ylabel("Probability")
plt.show()

plt.plot(range(GENERATION_SIZE), solutions_list)
plt.xlabel("Generation")
plt.ylabel("Number of wins")
plt.show()




TypeError: 'float' object is not subscriptable