## LAB9
Write a local-search algorithm (eg. an EA) able to solve the Problem instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:
 - Submission: Sunday, December 3 (CET)
 - Reviews: Sunday, December 10 (CET)

Notes:
 - Reviews will be assigned on Monday, December 4
 - You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [21]:
from random import choice, choices, random
from copy import deepcopy

import lab9_lib

In [22]:
GENERATIONS = 10000
INDIVIDUAL_SIZE = 1000
POPULATION_SIZE = 100
ACCEPTABLE_FITNESS = 0.9

In [23]:
def create_population(number_of_indivinuals, individual_size):
    population = []
    for _ in range(number_of_indivinuals):
        population.append([choices([0, 1], k=individual_size), []])
    return population


# return the population ordered by fitness, the return is a list of tuples [individual, fitness]
def fitness_check(population_with_fitness : list, new_population, fitness_function, num_survivors):

    # individual consists of [genome, mutation_history]
    for individual in new_population:
        population_with_fitness.append([individual[0], individual[1], fitness_function(individual[0])])

    population_with_fitness.sort(key=lambda x: x[2], reverse=True)
    return population_with_fitness[0 : num_survivors]


# creates a number of new individuals through crossover and generates random mutations
def repopulate(population, new_generation_size, best_fitness):
    offsprings = []
    mutation_size = 100
    mutation_rate = 0.5
    crossover_parents = 2
    crossover_parent_contribution = int(len(population[0][0]) / crossover_parents)

    for _ in range(new_generation_size):
        parents = choices(population, k=crossover_parents)
        offspring = []
        mutation_history = []
        for i, parent in enumerate(parents):
            x = i * crossover_parent_contribution
            offspring += parent[0][x : x + crossover_parent_contribution]
            [mutation_history.append(g) for g in range(x, x + crossover_parent_contribution) if g in parent[1]]
        offsprings.append([offspring, mutation_history])
    
    '''# generate mutations in the newly created offsprings
    for offspring in offsprings:
        mutation_history : list = offspring[1]
        new_mutation_history = []
        for g in range(len(offspring[0])):
            mutation_modifier = 1 / (1 + mutation_history.count(g))
            if random() < (mutation_rate * mutation_modifier):
                offspring[0][g] = 1 - offspring[0][g]
                offspring[1].append(g)
                new_mutation_history.append(g)
        offspring[1] = new_mutation_history'''

    # generate mutations in the newly created offsprings
    for offspring in offsprings:
        new_mutation_history = []
        if random() < mutation_rate:
            mutation_history = offspring[1]
            mutation_pool = choices(range(len(offspring[0])), k=mutation_size)
            mutating_genes = [g for g in mutation_pool if g not in mutation_history]
            # mutation_size = choice(range(max_mutation_size))
            for g in mutating_genes:
                offspring[0][g] = 1 - offspring[0][g]
                new_mutation_history.append(g)
        offspring[1] = new_mutation_history
    return offsprings
        

In [24]:
def ea_mutation_memory(fitness, problem_size : int, pop_size : int, ind_size : int):
    num_survivors = int(pop_size / 2)
    population = create_population(num_survivors, ind_size)
    fitted_population = fitness_check([], population, fitness, num_survivors)
    best_fitness = fitted_population[0][2]
    stagnation_counter = GENERATIONS / 20

    for gen in range(GENERATIONS):
        
        # create new generation
        offsprings = repopulate(fitted_population, pop_size-num_survivors, best_fitness)

        # sort population based on fitness and discard all but the top 10%
        fitted_population = fitness_check(fitted_population, offsprings, fitness, num_survivors)
        new_best_fitness = fitted_population[0][2]
        
        print(f"Problem size {problem_size}, Generation {gen+1} : {new_best_fitness:.2%}")

        # check if new best fitness
        if best_fitness < new_best_fitness:
            best_fitness = new_best_fitness
            stagnation_counter = GENERATIONS / 100
        else:
            # if we are above a certain fitness treshold, check for stagnation
            if best_fitness >= ACCEPTABLE_FITNESS:
                stagnation_counter -= 1
                if stagnation_counter == 0:
                    return population[0], best_fitness

        # if we reached 100% fitness, end
        if best_fitness == 1:
            return population[0], best_fitness
        
    
    return population[0], best_fitness

In [25]:
results = []
for problem_size in [1, 2, 5, 10]:
    fitness_function = lab9_lib.make_problem(problem_size)
    individual, individual_fitness = ea_mutation_memory(fitness_function, problem_size, POPULATION_SIZE, INDIVIDUAL_SIZE)
    print(f'Final result : {individual_fitness:.2%}')
    # print(f"{''.join(str(i) for i in individual)} : {fitness_function(individual):.2%}")
    calls = fitness_function.calls
    print(f'fitness calls: {calls}')
    results.append([problem_size, individual_fitness, calls])

Problem size 1, Generation 1 : 53.40%
Problem size 1, Generation 2 : 53.40%
Problem size 1, Generation 3 : 53.60%
Problem size 1, Generation 4 : 54.30%
Problem size 1, Generation 5 : 54.30%
Problem size 1, Generation 6 : 56.20%
Problem size 1, Generation 7 : 56.20%
Problem size 1, Generation 8 : 56.80%
Problem size 1, Generation 9 : 56.80%
Problem size 1, Generation 10 : 57.20%
Problem size 1, Generation 11 : 57.20%
Problem size 1, Generation 12 : 57.80%
Problem size 1, Generation 13 : 57.80%
Problem size 1, Generation 14 : 57.80%
Problem size 1, Generation 15 : 57.90%
Problem size 1, Generation 16 : 59.00%
Problem size 1, Generation 17 : 59.00%
Problem size 1, Generation 18 : 59.70%
Problem size 1, Generation 19 : 59.70%
Problem size 1, Generation 20 : 59.70%
Problem size 1, Generation 21 : 60.10%
Problem size 1, Generation 22 : 60.10%
Problem size 1, Generation 23 : 60.30%
Problem size 1, Generation 24 : 60.40%
Problem size 1, Generation 25 : 60.50%
Problem size 1, Generation 26 : 60

In [26]:
for result in results:
    print(f'Problen size: {result[0]}')
    print(f'Best fitness: {result[1]:.2%}')
    print(f'Fitness calls: {result[2]}')
    print()

Problen size: 1
Best fitness: 70.00%
Fitness calls: 500050

Problen size: 2
Best fitness: 66.40%
Fitness calls: 500050

Problen size: 5
Best fitness: 46.77%
Fitness calls: 500050

Problen size: 10
Best fitness: 35.86%
Fitness calls: 500050

