Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB3

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [383]:
from random import choices, choice, randint, random, gauss
from dataclasses import dataclass
from copy import copy
import lab3_lib

In [384]:
# GENERAL PARAMETERS
LOCI = 1000
TOURNAMENT_SIZE = 5 

In [385]:
@dataclass
class Individual:
    genotype: list[int]
    fitness = None

def generate_pop(pop_size):
    global counter
    # The population is a set of random individuals
    population = [Individual(genotype = choices([0,1], k = LOCI)) 
              for _ in range(pop_size)]
    for ind in population:
        ind.fitness = fitness(ind.genotype)
        counter += 1
    return population

# --------------------------------------------------------MUTATION------------------------------------------------------------- 

# single bit mutation 
def mutate_single_bit(ind: Individual):
    mutated = copy(ind)
    index = randint(0, LOCI - 1) 
    mutated.genotype[index] = not mutated.genotype[index]
    mutated.fitness = None
    return mutated

# n bit mutation (the value of n is chosen from a gaussian distribution centered in 0 and with a sigma that adpats)
def mutate_n_bit(ind: Individual, n):
    mutated = copy(ind)
    for _ in range(n):
        index = randint(0, LOCI - 1) 
        mutated.genotype[index] = not mutated.genotype[index]
    mutated.fitness = None
    return mutated

# --------------------------------------------------------XOVER------------------------------------------------------------- 

# One cut xover
def one_cut_xover(ind1: Individual, ind2: Individual):
    cut_point = randint(0, LOCI-1)
    new_ind = Individual(genotype = ind1.genotype[:cut_point] + ind2.genotype[cut_point:])
    assert len(new_ind.genotype) == LOCI
    new_ind.fitness = None
    return new_ind

# Uniform xover
def uniform_xover(ind1: Individual, ind2: Individual):
    geno = list()
    for l in range(LOCI):
        geno.append(choice([ind1.genotype[l], ind2.genotype[l]]))
    new_ind = Individual(genotype = geno)
    new_ind.fitness = None
    return new_ind

# ------------------------------------------------PARENT SELECTION------------------------------------------------------------- 
# This is the function to select the best parent in my population

# Tournament selection
def tournament_selection(population):
    # I'm picking TOURNAMENT SIZE random individuals and then I pick the best one out of it
    candidates = [choice(population) for _ in range(TOURNAMENT_SIZE)]
    champion = max(candidates, key = lambda i: i.fitness)
    return champion

In [386]:
def ga_algorithm(pop_size, off_size, n_generations, mut_prob, p_selection, xover_strategies, mutate):
    # I reset the number of fitness call
    global counter
    counter = 0
    # I generate the population
    population = generate_pop(pop_size)
    for generation in range(n_generations):
        offspring = list()
        for _ in range(off_size):
            # xover
            p1 = p_selection(population)
            p2 = p_selection(population)
            xover = choice(xover_strategies) # I randomly choose one of the 2 strategies
            o = xover(p1, p2)
            if random() < mut_prob:
                # mutation
                o = mutate(o)
            offspring.append(o)
        # Now I evaluate the new offspring
        for o in offspring:
            if o.fitness is None:
                o.fitness = fitness(o.genotype)
                counter += 1
        
        # Steady state case
        if off_size < pop_size:
            population.extend(offspring)
            population.sort(key = lambda i: i.fitness, reverse = True)
            population = population[:pop_size]
        else:
            population = offspring
            # Then I sort comparing the fitnesses
            population.sort(key = lambda i: i.fitness, reverse = True)
        #print(f'best individual of #{generation+1} generation has the following fitness: {population[0].fitness: .2%}')
    print(f'Best individual has the following fitness: {population[0].fitness: .2%}\nFitness calls: {counter}\n')
    counter = 0

#---------------------------------------------------------------------------------------------------------------------------

def ga_algorithm_elitism(pop_size, off_size, n_generations, mut_prob, p_selection, xover_strategies, mutate):
    # I reset the number of fitness call
    global counter
    counter = 0
    # I generate the population
    population = generate_pop(pop_size)
    for generation in range(n_generations):
        # I pick the best 10% individuals,
        population.sort(key = lambda i: i.fitness, reverse = True)
        elite_num = round(0.1 * pop_size)
        elite = population[:elite_num]
        # I take into account only the rest of the population, without the elite
        population = population[elite_num:]
        
        offspring = list()
        # the best 50% directly in the next generation
        offspring += elite[:round(len(elite)/2)]
        
        # the others xover with rest of population with possible mutation
        for _ in range(off_size-len(offspring)):
            # xover
            p1 = p_selection(population)
            p2 = choice(elite[round(len(elite)/2):])
            xover = choice(xover_strategies) # I randomly choose one of the 2 strategies
            o = xover(p1, p2)
            if random() < mut_prob:
                # mutation
                o = mutate(o)
            offspring.append(o)
        # Now I evaluate the new offspring
        for o in offspring:
            if o.fitness is None:
                o.fitness = fitness(o.genotype)
                counter += 1
        population = offspring
        #print(f'best individual of #{generation+1} generation has the following fitness: {population[0].fitness: .2%}')
    print(f'Best individual has the following fitness: {population[0].fitness: .2%}\nFitness calls: {counter}\n')
    counter = 0

#---------------------------------------------------------------------------------------------------------------------------

def ga_algorithm_adaptive(pop_size, off_size, n_generations, sigma, p_selection, xover_strategies, mutate):
    # I reset the number of fitness call
    global counter
    counter = 0
    # I generate the population
    population = generate_pop(pop_size)
    previous_avg_fit = list()
    era = 0.2*n_generations
    for generation in range(n_generations):
        offspring = list()
        for _ in range(off_size):
            # xover
            p1 = p_selection(population)
            p2 = p_selection(population)
            xover = choice(xover_strategies) # I randomly choose one of the 2 strategies
            o = xover(p1, p2)
            n = round(abs(gauss(0, sigma)))
            o = mutate(o, n)
            offspring.append(o)
        # Now I evaluate the new offspring
        for o in offspring:
            if o.fitness is None:
                o.fitness = fitness(o.genotype)
                counter += 1
        
        # Steady state case
        if off_size < pop_size:
            population.extend(offspring)
            population.sort(key = lambda i: i.fitness, reverse = True)
            population = population[:pop_size]
        else:
            population = offspring
            # Then I sort comparing the fitnesses
            population.sort(key = lambda i: i.fitness, reverse = True)
        
        # compute the avg fit
        avg_fit = sum([i.fitness for i in population])/pop_size
        previous_avg_fit.append(avg_fit)
        # Each era (era = 20% of n_generations) I check if I am improving
        if (generation+1) % era == 0:
            discriminant = sum(previous_avg_fit)/era
            if avg_fit < discriminant: # low quality avg solutions so I improve the mutability in order to explore more
                sigma += 0.5
            elif sigma != 0.5:
                sigma -= 0.5
            # resetto
            previous_avg_fit = list()
        #print(f'best individual of #{generation+1} generation has the following fitness: {population[0].fitness: .2%}')
    print(f'Best individual has the following fitness: {population[0].fitness: .2%}\nFitness calls: {counter}\n')
    counter = 0


In [387]:
# -------------------------------------------FIRST IMPLEMENTATION---------------------------------------------------------------
# STRATEGY ADOPTED: 
# - Parent selection through tournament selection
# - XOVER: one_cut or uniform chosen randomly
# - Mutation: single bit mutation
# - Generational approach

mutate = mutate_single_bit
xover_sets = [uniform_xover]
p_selection = tournament_selection
counter = 0

MUTATION_PROBABILITY = .15
NUM_GENERATIONS = 100
POPULATION_SIZE = 150
OFFSPRING_SIZE = 150
# Since POPULATION_SIZE = OFFSPRING_SIZE ----> Generational approach

print("Problem 1:")
fitness = lab3_lib.make_problem(1)
ga_algorithm(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
             p_selection, xover_sets, mutate)
print("Problem 2:")
fitness = lab3_lib.make_problem(2)
ga_algorithm(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
             p_selection, xover_sets, mutate)
print("Problem 5:")
fitness = lab3_lib.make_problem(5)
ga_algorithm(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
             p_selection, xover_sets, mutate)
print("Problem 10:")
fitness = lab3_lib.make_problem(10)
ga_algorithm(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
             p_selection, xover_sets, mutate)

# Small variant: steady state
POPULATION_SIZE = 200
OFFSPRING_SIZE = 150
print("\nSTEADY STATE APPROACH")
print("Problem 1:")
fitness = lab3_lib.make_problem(1)
ga_algorithm(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
             p_selection, xover_sets, mutate)
print("Problem 2:")
fitness = lab3_lib.make_problem(2)
ga_algorithm(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
             p_selection, xover_sets, mutate)
print("Problem 5:")
fitness = lab3_lib.make_problem(5)
ga_algorithm(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
             p_selection, xover_sets, mutate)
print("Problem 10:")
fitness = lab3_lib.make_problem(10)
ga_algorithm(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
             p_selection, xover_sets, mutate)

# I runned this piece of code multiple times and from the results I can say that for
# fitness instance 1 there is no any improvement with a steady state approach w.r.t the generational one
# a parità di fitness calls

Problem 1:
Best individual has the following fitness:  97.60%
Fitness calls: 15150

Problem 2:
Best individual has the following fitness:  48.57%
Fitness calls: 15150

Problem 5:
Best individual has the following fitness:  19.41%
Fitness calls: 15150

Problem 10:
Best individual has the following fitness:  9.66%
Fitness calls: 15150


STEADY STATE APPROACH
Problem 1:
Best individual has the following fitness:  97.20%
Fitness calls: 15200

Problem 2:
Best individual has the following fitness:  64.40%
Fitness calls: 15200

Problem 5:
Best individual has the following fitness:  43.96%
Fitness calls: 15200

Problem 10:
Best individual has the following fitness:  27.17%
Fitness calls: 15200



In [388]:
# -------------------------------------------SECOND IMPLEMENTATION---------------------------------------------------------------
# STRATEGY ADOPTED: 
# - Elitism
# - Parent selection through tournament selection
# - XOVER: one_cut or uniform chosen randomly
# - Mutation: single bit mutation
# - Generational approach
mutate = mutate_single_bit
xover_sets = [uniform_xover]
p_selection = tournament_selection
MUTATION_PROBABILITY = .15
NUM_GENERATIONS = 100
POPULATION_SIZE = 160
OFFSPRING_SIZE = 160

print("Problem 1:")
fitness = lab3_lib.make_problem(1)
ga_algorithm_elitism(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
                     p_selection, xover_sets, mutate)
print("Problem 2:")
fitness = lab3_lib.make_problem(2)
ga_algorithm_elitism(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
                     p_selection, xover_sets, mutate)
print("Problem 5:")
fitness = lab3_lib.make_problem(5)
ga_algorithm_elitism(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
                     p_selection, xover_sets, mutate)
print("Problem 10:")
fitness = lab3_lib.make_problem(10)
ga_algorithm_elitism(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, MUTATION_PROBABILITY, 
                     p_selection, xover_sets, mutate)

# I runned this piece of code multiple times and from the results I can say that for
# fitness instance 1 there is a worsening in the fitness reached a parità di fitness call

Problem 1:
Best individual has the following fitness:  96.50%
Fitness calls: 15360

Problem 2:
Best individual has the following fitness:  56.80%
Fitness calls: 15360

Problem 5:
Best individual has the following fitness:  49.29%
Fitness calls: 15360

Problem 10:
Best individual has the following fitness:  27.89%
Fitness calls: 15360



In [389]:
# -------------------------------------------THIRD IMPLEMENTATION---------------------------------------------------------------
# STRATEGY ADOPTED: 
# - Parent selection through tournament selection
# - XOVER: one_cut or uniform chosen randomly
# - Mutation: n bit mutation where n is taken from a gaussian distribution centered in 0 
#   and with a sigma value that adapts over time looking at the avg fitness function of an entire era
#   era = 20% of n_generations
#   THERE IS NO MUTATION PROBABILITY HERE BECAUSE IM TAKING N FROM A GAUSSIAN SO IT CAN APPEN THAT THE VALUE IS 0
# - Generational approach

mutate = mutate_n_bit
xover_sets = [uniform_xover]
p_selection = tournament_selection
NUM_GENERATIONS = 100
POPULATION_SIZE = 150
OFFSPRING_SIZE = 150
sigma = 1

print("Problem 1:")
fitness = lab3_lib.make_problem(1)
ga_algorithm_adaptive(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, sigma, p_selection, xover_sets, mutate)
print("Problem 2:")
fitness = lab3_lib.make_problem(2)
ga_algorithm_adaptive(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, sigma, p_selection, xover_sets, mutate)
print("Problem 5:")
fitness = lab3_lib.make_problem(5)
ga_algorithm_adaptive(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, sigma, p_selection, xover_sets, mutate)
print("Problem 10:")
fitness = lab3_lib.make_problem(10)
ga_algorithm_adaptive(POPULATION_SIZE, OFFSPRING_SIZE, NUM_GENERATIONS, sigma, p_selection, xover_sets, mutate)

Problem 1:
Best individual has the following fitness:  99.20%
Fitness calls: 15150

Problem 2:
Best individual has the following fitness:  48.90%
Fitness calls: 15150

Problem 5:
Best individual has the following fitness:  19.40%
Fitness calls: 15150

Problem 10:
Best individual has the following fitness:  9.78%
Fitness calls: 15150



In [None]:
# I WOULD LIKE TO IMPLEMENT THIS APPROACHES INSIDE AN ISLAND MODEL AND SEE WHAT IT BRINGS ME
# AFTER THE FIRST COMMITTMENT I WOULD LIKE TO IMPLEMENT THIS KIND OF MODEL 