Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [912]:
from random import choices, uniform
import random
from dataclasses import dataclass, field
from copy import deepcopy
from typing import Callable, List

import numpy as np

import lab9.lab9_lib as lab9_lib

In [913]:
fitness = lab9_lib.make_problem(10)
for n in range(10):
    ind = choices([0, 1], k=50)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}")

print(fitness.calls)

11101111111101011111000001100100110111110010100011: 9.11%
11001000111011010101101000111111011110001001011011: 9.13%
01100001100011111001000000101101100011011110000011: 15.33%
01111011100010000011010001111100110001101110010101: 23.34%
00010101101001100001010000100110010011110000101011: 7.33%
00010011100010010101100010100111011110011101111001: 15.33%
10010011010001100010001110011100010111001011011100: 9.14%
01101011011011000101011010001100000000101110110110: 7.33%
10001010111111110010010111100100101001111110010011: 23.33%
01011011011011110001011100011000101111001011111111: 31.33%
10


# Individual
It defines the individual of the ES strategy, it is composed by a genome, which is the binary representations with length n:loci an by a fitness score, which is computed using the callable fitness_function passed as parameter
# Operator Agent
It defines an agent which compute the variation operators and has a global view on the statistics of the operator applied, initially the purpose was to dinamically adapt the parameter that regolates the use of the operators and the parameters, based on the success statistics of the operators (child with a fitness score higher than the parents)

In [914]:
@dataclass
class Individual:
    genome : tuple[int]
    n_loci: int
    fitness: float
    fitness_func: Callable[[tuple[int]], float]
    
    def __init__(self, FITNESS_FUNC, n_loci, genome = None):
        self.n_loci = n_loci
        if genome is None:
            self.genome = random.choices([0, 1], k=n_loci)
        else:
            self.genome = genome
        self.fitness_func = FITNESS_FUNC
        self.fitness = self.fitness_func(self.genome)
    
    def __repr__(self):
        return f"{[str(val) for val in self.genome]}"
    
@dataclass
class Operators_agent:
    
    n_points_number: int
    mutation_points: int
    points_to_mutate: int
    mutation_rate: float
    crossover_functions : []

    
    def __init__(self, n_points_number, points_to_mutate, mutation_rate):
        self.n_points_number = n_points_number
        self.mutation_points = 1
        self.points_to_mutate = points_to_mutate
        self.mutation_rate = mutation_rate
        self.crossover_functions = [Operators_agent.crossover_one_point, Operators_agent.crossover_n_point, Operators_agent.crossover_uniform]
        
    
    def mutate(self, individual: "Individual"):
        
        index_to_mutate = random.choices(range(individual.n_loci), k=self.points_to_mutate)
        new_genome = deepcopy(individual.genome)
        for i in index_to_mutate:
            new_genome[i] = 1 - new_genome[i]
        return Individual(individual.fitness_func, individual.n_loci, new_genome)
    
    def crossover(self, ind1: "Individual", ind2: "Individual", cross_operation: int):
        crossover_function = self.crossover_functions[cross_operation]
        child = crossover_function(ind1, ind2, self.n_points_number)
        return child
    
    def crossover_one_point(ind1: "Individual", ind2: "Individual", n):
        index_to_cross = random.randrange(ind1.n_loci)
        child = Individual(ind1.fitness_func, ind1.n_loci, ind1.genome[:index_to_cross] + ind2.genome[index_to_cross:])
        return child
        
    def crossover_n_point(ind1: "Individual", ind2: "Individual", n):
        index_list = sorted(random.sample(range(ind1.n_loci), n))
        child1_genome, child2_genome = ind1.genome[:], ind2.genome[:]
        for i in range(0, n, 2):
            start = index_list[i]
            end = index_list[i+1] if i+1 < n else len(ind1.genome)
            child1_genome[start:end] = child2_genome[start:end]
        return Individual(ind1.fitness_func, ind1.n_loci, child1_genome)
                    
    def crossover_uniform(ind1: "Individual", ind2: "Individual", n):
        index_list = [random.random() for _ in range(ind1.n_loci)]
        return Individual(ind1.fitness_func, ind1.n_loci, [i1 if index < .5 else i2 for i1, i2, index in zip(ind1.genome, ind2.genome, index_list)])

# Parent selection function
This function compute a stochastic universal sampling for parent selection and use as parameter the reduction factor "parent_selection_rate" to indicates how much to reduce the initial populations
The resulting population will be the one where the tournament takes place

In [931]:
def stochastic_universal_sampling(population: List[Individual], parent_selection_rate: int) -> List[Individual]:
        
    total_fitness = sum(ind.fitness for ind in population)
    pointer_distance = total_fitness / len(population)
    num_selected_parents = int(parent_selection_rate)
    start = uniform(0, pointer_distance)
    pointers = [start + i * pointer_distance for i in range(num_selected_parents)]

    new_population = []
    current_index = 0
    for pointer in pointers:
        while pointer > 0:
            pointer -= population[current_index].fitness
            current_index = (current_index + 1) % len(population)
        new_population.append(population[current_index])
        if len(new_population) == num_selected_parents:
            break 

    return new_population

# Tournament function
It implements a tournament selection to take a parent to use for recombination or reproduction

In [916]:
def select_parent(parents: list[Individual], size: int):
    pool = choices(parents, k=size)
    winner = max(pool, key=lambda ind: ind.fitness)
    return winner

# Offspring generation function
It generates the offspring by apply, using the tournament function to select the parent each iteration, a recombination or mutation depending on the mutation rate

In [917]:
def offspring_generation(parents: List[Individual], offspring_size: int, operator_agent: "Operators_agent", tournament_size: int, cross_operation) -> List[Individual]:
    offspring = []
    offspring_length = int(offspring_size)
    
    for _ in range(offspring_length):
        p = select_parent(parents, tournament_size)
        if random.random() < operator_agent.mutation_rate:
            offspring.append(operator_agent.mutate(p))
        else:
            p2 = select_parent(parents, tournament_size)
            offspring.append(operator_agent.crossover(p, p2, cross_operation))
                  
    return offspring

# Evolutionary algorithm
It implements all the evolutionary cycle
The principal steps are:
* Initialization
    * Initializes the fitness_function with the specified instance
    * Generates the initial population randomically
* For each generation:
    * It first applies elitism to select the best parent
    * It generates the offspring population, with a size specifying by the offspring rate
    * It selects the best individual considering the union of both selected parents and offspring or just the offspring, depending on the selection type
    * It checks if the best individual has reached the fitness score goal
* to compute the variation operators it uses an operator agent
    
* In the end it returns both the best individual and the number of fitness calls 

In [918]:
import math

def launch_es_cycle(problem_instance: int, populations_number: int, n_loci: int, generations: int, offspring_size: int, tournament_size: int, selection_type: int, operator_agent: "Operators_agent", cross_operation: int):
    
    fitness_func = lab9_lib.make_problem(problem_instance)
    population = [Individual(fitness_func, n_loci) for _ in range(populations_number)]
    
    for gen in range(generations):
        
        offspring = offspring_generation(population, offspring_size, operator_agent, tournament_size, cross_operation)
        population = sorted(population+offspring if selection_type == 1 else offspring, key=lambda p: p.fitness, reverse=True)[:populations_number]
        if math.isclose(1, population[0].fitness):
            break
        
    return population[0], fitness_func.calls

# Parameters tuning
This code has the purpose to tune some parameters, considering a small populations size and generations number, to find the best combination that will be used then to test the algorithm
The parameters are:
* OFFSPRING_SIZE: It indicates the size of the generated offspring
* CROSS_OPERATIONS: It contains an array of 3 integer, each one corresponding to a different cross function
* NUMBER_CROSSOVER_POINTS: It contains an array of integer that indicates the point in which the parents will be split in the crossover_n_point function
* TOURNAMENT_SIZE: It indicates how much is the size of the array of individuals selected to compete in the tournament selection
* MUTATION_POINTS: It indicates how much points the mutation change

In [919]:
import itertools

LOCI = 1000
PROBLEM_INSTANCES = [1, 2, 5, 10]

GENERATIONS_TEST = 1000
POPULATIONS_NUMBER_TEST = 50

OFFSPRING_SIZE = 20
NUMBER_CROSSOVER_POINTS = 2
MUTATION_RATES = {1: 0.8, 2: 0.2, 5: 0.2, 10: 0.2}

TOURNAMENT_SIZE = [2, 4, 8]
SELECTION_TYPE = [0, 1]
CROSS_OPERATIONS = [0, 1, 2]
MUTATION_POINTS = [1, 4]


parameter_combinations = list(itertools.product(PROBLEM_INSTANCES, TOURNAMENT_SIZE, SELECTION_TYPE, MUTATION_POINTS, CROSS_OPERATIONS))

best_parameters = {}
best_individuals = {}

for instance, t_size, s_type, m_points, cr_op in parameter_combinations:
    operator_agent = Operators_agent(NUMBER_CROSSOVER_POINTS, m_points, MUTATION_RATES[instance])
    individual, calls = launch_es_cycle(instance, POPULATIONS_NUMBER_TEST, LOCI, GENERATIONS_TEST, OFFSPRING_SIZE, t_size, s_type, operator_agent, cr_op)
    if instance not in best_parameters or (individual.fitness > best_individuals[instance][0].fitness)\
            or (math.isclose(individual.fitness, best_individuals[instance][0].fitness) and calls < best_individuals[instance][1]):
        best_individuals[instance] = [individual, calls]
        best_parameters[instance] = [t_size, s_type, m_points, cr_op]

for instance in PROBLEM_INSTANCES:
    print(f"Instance {instance}\n "
          f"Best Individual: Score: {best_individuals[instance][0].fitness}\n "              
          f"Calls: {best_individuals[instance][1]}")
    print(f"Parameters:\n "
          f"Tournament size:{best_parameters[instance][0]}\n"
          f"Selection type:{best_parameters[instance][1]}\n"
          f"Points to mutate:{best_parameters[instance][2]}\n"
          f"Cross operation:{best_parameters[instance][3]}\n")


Instance 1
 Best Individual: Score: 1.0
 Calls: 17390
Parameters:
 Tournament size:8
Selection type:1
Points to mutate:1
Cross operation:2

Instance 2
 Best Individual: Score: 0.812
 Calls: 20050
Parameters:
 Tournament size:8
Selection type:1
Points to mutate:4
Cross operation:2

Instance 5
 Best Individual: Score: 0.4656
 Calls: 20050
Parameters:
 Tournament size:4
Selection type:1
Points to mutate:4
Cross operation:1

Instance 10
 Best Individual: Score: 0.40372
 Calls: 20050
Parameters:
 Tournament size:4
Selection type:1
Points to mutate:4
Cross operation:0


# Instance 1 
Parameter tuning for offspring and population size


In [921]:
TOURNAMENT_SIZE = 8
SELECTION_TYPE = 1
POINTS_TO_MUTATE = 1
CROSS_OPERATION = 2
INSTANCE = 1
POPULATION_SIZE = [20, 40, 60, 80]
OFFSPRING_SIZE = [20, 30, 40, 50]
MUTATION_RATE = 0.8
GENERATIONS = 5000

parameter_combinations = list(itertools.product(POPULATION_SIZE, OFFSPRING_SIZE))

best_parameters = {}
best_individuals = {}

for p_size, of_size in parameter_combinations:
    operator_agent = Operators_agent(INSTANCE, POINTS_TO_MUTATE, MUTATION_RATE)
    individual, calls = launch_es_cycle(INSTANCE, p_size, LOCI, GENERATIONS, of_size, TOURNAMENT_SIZE, SELECTION_TYPE, operator_agent, CROSS_OPERATION)
    if instance not in best_parameters or (individual.fitness > best_individuals[instance][0].fitness)\
            or (math.isclose(individual.fitness, best_individuals[instance][0].fitness) and calls < best_individuals[instance][1]):
        best_individuals[instance] = [individual, calls]
        best_parameters[instance] = [p_size, of_size]

print(f"Instance {INSTANCE}\n "
          f"Best Individual: Score: {best_individuals[instance][0].fitness}\n "              
          f"Calls: {best_individuals[instance][1]}")
print(f"Parameters:\n "
          f"Population size:{best_parameters[instance][0]}\n "
          f"Offspring size:{best_parameters[instance][1]}\n")

Instance 1
 Best Individual: Score: 1.0
 Calls: 11660
Parameters:
 Population size:20
Offspring size:20


This function enhanced the previous one by adding a control on the standard deviation and, in case of a too low value, it decrement the population to a minor number with the stochastic universal sampling and then add random individual to reobtain the original population number

In [934]:
import numpy as np

def launch_es_cycle_with_survival_selection(problem_instance: int, populations_number: int, n_loci: int, generations: int, offspring_size: int, tournament_size: int, selection_type: int, operator_agent: "Operators_agent", cross_operation: int, parent_selection_size: int):
    
    fitness_func = lab9_lib.make_problem(problem_instance)
    population = [Individual(fitness_func, n_loci) for _ in range(populations_number)]
    
    for gen in range(generations):
        
        offspring = offspring_generation(population, offspring_size, operator_agent, tournament_size, cross_operation)
        population = sorted(population+offspring if selection_type == 1 else offspring, key=lambda p: p.fitness, reverse=True)[:populations_number]
        if math.isclose(1, population[0].fitness):
            break
        if np.std([p.fitness for p in population]) < 0.005:
            best_individual = population[0]
            population.remove(population[0])
            population = stochastic_universal_sampling(population, parent_selection_size)
            population.append(best_individual)
            population.extend([Individual(fitness_func, n_loci) for _ in range(populations_number-len(population))])
        
    return population[0], fitness_func.calls

In [943]:
LOCI = 1000
PROBLEM_INSTANCES = [2, 5, 10]

TOURNAMENT_SIZE = {2: 8, 5: 4, 10: 4}
SELECTION_TYPE = 1
POINTS_TO_MUTATE = 4
CROSS_OPERATION = {2: 2, 5: 1, 10: 0}
NUMBER_CROSSOVER_POINTS = 2
PARENT_SELECTION_SIZE = [35, 65]
POPULATION_SIZE = 100
OFFSPRING_SIZE = 50
MUTATION_RATE = [0.2, 0.6]
GENERATIONS = 40000

parameter_combinations = list(itertools.product(PROBLEM_INSTANCES, PARENT_SELECTION_SIZE, MUTATION_RATE))

best_parameters = {}
best_individuals = {}

for instance, p_s_s, m_rate in parameter_combinations:
    operator_agent = Operators_agent(NUMBER_CROSSOVER_POINTS, POINTS_TO_MUTATE, m_rate)
    individual, calls = launch_es_cycle_with_survival_selection(instance, POPULATION_SIZE, LOCI, GENERATIONS, OFFSPRING_SIZE, TOURNAMENT_SIZE[instance], SELECTION_TYPE, operator_agent, CROSS_OPERATION[instance], p_s_s)
    if instance not in best_parameters or (individual.fitness > best_individuals[instance][0].fitness)\
            or (math.isclose(individual.fitness, best_individuals[instance][0].fitness) and calls < best_individuals[instance][1]):
        best_individuals[instance] = [individual, calls]
        best_parameters[instance] = [p_s_s, m_rate]

for instance in PROBLEM_INSTANCES:
    print(f"Instance {instance}\n "
          f"Best Individual: Score: {best_individuals[instance][0].fitness}\n "              
          f"Calls: {best_individuals[instance][1]}")
    print(f"Parameters:\n "
          f"Parent Selection Rate:{best_parameters[instance][0]}\n"
          f"Mutation Rate:{best_parameters[instance][1]}\n")

Instance 2
 Best Individual: Score: 0.998
 Calls: 2776388
Parameters:
 Parent Selection Rate:65
Mutation Rate:0.6

Instance 5
 Best Individual: Score: 0.72
 Calls: 2650248
Parameters:
 Parent Selection Rate:65
Mutation Rate:0.6

Instance 10
 Best Individual: Score: 0.4
 Calls: 2634532
Parameters:
 Parent Selection Rate:35
Mutation Rate:0.6
