# NimAgent
This class represents our agent, the one that uses the weights that are updated through an ES-startegy

In [None]:
class NimAgent:
    rules = list()
    weights = list()

    def __init__(self, weights, rules):
        self.weights = weights
        self.rules = rules
    
    def choose_move(self, state:Nim) -> Nimply:
        # More efficient working on ndarray
        np_weights = np.array(self.weights)
        indexes = np.argsort(np_weights)
        priority_queue = np.array(self.rules)[indexes][::-1]

        move = None
        for rule in priority_queue:
            ply = rule(state)
            if ply != None:
                move = ply
                break 
        
        return move

# Set of rules
All the rules used by our NimAgent 

In [None]:
B_PERC = 30
A_PERC = 80


# A set of rules that I can use for my agent
# Every ruke returns a specific move if applicable otherwise it returns None

# If the percentage of sticks in a row is <= to B_PERC
# I take all the remaining objects of the row
def rule_take_all_if_below(state: Nim):
    for row,n_objects in enumerate(state.rows):
        if n_objects == 0:
            continue
        total_objects = row*2 + 1
        percentage = n_objects/total_objects * 100
        if percentage <= B_PERC:
            return Nimply(row, n_objects)
    return None

# If the percentage of sticks in a row is >= to A_PERC
# I take half of the sticks in the row (rounded down -> 5/2 = 2 NOT 3)
def rule_take_half_if_above(state: Nim):
    for row,n_objects in enumerate(state.rows):
        total_objects = row*2 + 1
        percentage = n_objects/total_objects * 100
        if percentage >= A_PERC:
            to_take = n_objects//2
            if to_take == 0:
                to_take = 1
            return Nimply(row, to_take)
    return None

# If the row has an even number of remaining objects, I take 1
def rule_take_if_even(state: Nim):
    for row,n_objects in enumerate(state.rows):
        if n_objects == 0:
            continue
        if n_objects % 2 == 0:
            return Nimply(row, 1)
    return None

# If the row has an odd number of remaining objects, I take 1
def rule_take_if_odd(state: Nim):
    for row,n_objects in enumerate(state.rows):
        if n_objects == 0:
            continue
        if n_objects % 2 != 0:
            return Nimply(row, 1)
    return None

# I select a random row where I can take 2 sticks from
def rule_take_2(state: Nim):
    mask = np.array(state.rows) >= 2
    is_possible = any(mask)
    if is_possible:
        row = choice([i for i,_ in enumerate(state.rows) if mask[i]])
        return Nimply(row, 2)
    return None

# I take all the remaining objects from the row with the most sticks
# I don't do this move only if there is one remaining row with sticks
def rule_eliminate_row(state: Nim):
    rows_with_objects = sum(np.array(state.rows) > 0)
    if rows_with_objects == 1:
        return None
    mask = np.array(state.rows) > 0
    row = choice([i for i,_ in enumerate(state.rows) if mask[i]])
    return Nimply(row, state.rows[row])

# Adaptive (μ+λ)-ES

In [None]:
AGENTS = [gabriele, pure_random, optimal]
NUM_ROWS = 5

# La fitness che voglio implementare è la seguente:
# Faccio giocare il mio agent N volte con vari player che sfruttano diverse strategie
# la mia fitness conta il numero di vittorie sulle N giocate e torna la percentuale di vittorie
def fitness(my_agent: NimAgent, n_games):
    wins = 0
    next_one = n_games/len(AGENTS)
    for game in range(n_games):
        index = int(game // next_one)
        oppo_strategy = AGENTS[index]
        strategy = choice([(my_agent.choose_move, oppo_strategy), (oppo_strategy, my_agent.choose_move)])
        winner = play_game(strategy, NUM_ROWS)
        if winner == strategy.index(my_agent.choose_move):
            wins+=1
    victory_perc = wins/n_games * 100
    return victory_perc


# λ is the size of the population/the number of offspring
# μ is the number of parents selected each generation
# σ is the variance of the gaussian used to update/mutate the parameters
# + strategy: the parents are DETERMINISTICALLY selected from the (multi-)set of
# BOTH the parents and offspring
def es_mu_plus_lambda(λ, μ, rules, generations, n_games):
    # 1) Initialize parent population (μ random individuals)
    # my single individual will be characterized by a set of weights (one for each rule my agent uses)
    parents = np.random.random((λ, len(rules)+1))

    best_fitness = None
    best_weights = None
    history = list()
    for gen in range(generations):
        # 2) Generate λ offspring that will form the offspring population
        # - Select (randomly) μ parents from the parent population
        indexes = np.random.randint(0, μ, size=(λ,)) # crea un array di λ numeri interi casuali nell’intervallo [0, μ).
        offspring = np.array([deepcopy(parents[i]) for i in indexes])

        # - Mutate all σ (variance of the gaussian) and replace negative values with a small number
        offspring[:, -1] = np.random.normal(loc = offspring[:, -1], scale=0.2)
        offspring[offspring[:, -1] < 1e-5, -1] = 1e-5
        # - Mutate all the offspring with the mutated σ
        offspring[:, 0:-1] = np.random.normal(
        loc=offspring[:, 0:-1], scale=offspring[:, -1].reshape(-1, 1)
        )

        # plus-strategy: the new parent population is selected from both the offspring and parent population
        population = np.vstack([parents, offspring])
        # 3) Select new parent population
        # - evaluate the fitness of the whole population
        scores = list()
        # for each set of weights
        for i in population:
            my_agent = NimAgent(i[0:-1], rules) # I create the agent associated to this specific set of weight
            # I let him play N times against various opponents
            scores.append(fitness(my_agent, n_games))
        
        # - ranking on the results of the fitness function
        ranking_ind = np.argsort(scores)
        population = population[ranking_ind]
        # - select top μ individuals as parents for the next generation
        parents = np.copy(population[-μ:])

        # for the plot
        best_of_gen = np.array(scores)[ranking_ind][-1]
        if best_fitness is None or best_fitness < best_of_gen:
            best_fitness = best_of_gen
            best_weights = parents[-1][0:-1]
            history.append((gen, best_fitness))
            print(f'GENERATION {gen+1}: best_fitness ---> {best_fitness}')
    
    print(f'The best set of weights for my agent is the following one: {best_weights}')

    # Plot per verificare ci siano effettivi miglioramenti
    history = np.array(history)
    plt.plot(history[:, 0], history[:, 1], marker=".")
    plt.show()

def test(weights, oppo_strategy, ngames, rules):
    wins = 0
    my_agent = NimAgent(weights, rules)
    for _ in range(ngames):
      strategy = choice([(my_agent.choose_move, oppo_strategy), (oppo_strategy, my_agent.choose_move)])
      winner = play_game(strategy, 5)
      if winner == strategy.index(my_agent.choose_move):
          wins+=1
    perc = wins/ngames * 100
    print(perc)