# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [1]:
from __future__ import annotations

from random import choices, randint, random, shuffle
from dataclasses import dataclass
from typing import Callable
from tqdm import trange
import numpy as np
import math


import lab9_lib
from lab9_lib import AbstractProblem

In [2]:
PROBLEM_SIZE = 10
LOCI = 1000

In [3]:
@dataclass(frozen=True, init=False)
class Individual:
    _n_loci: int
    _genotype: tuple[int]
    _fitness: float
    _fitness_function: Callable[[Individual], float]

    def __init__(self, fitness_function: "AbstractProblem", n_loci: int, genotype: tuple[int] = None):
        object.__setattr__(self, "_n_loci", n_loci)
        if genotype is None:
            genotype = tuple(choices([0, 1], k=n_loci))
        object.__setattr__(self, "_genotype", genotype)
        assert len(self._genotype) == self._n_loci, 'n_loci field and genotype length do not match'
        object.__setattr__(self, "_fitness_function", fitness_function)
        object.__setattr__(self, "_fitness", fitness_function(self.genotype))

    @property
    def n_loci(self):
        return self._n_loci

    @property
    def genotype(self):
        return self._genotype

    @property
    def fitness(self):
        return self._fitness

    @property
    def fitness_function(self):
        return self._fitness_function

    def __str__(self):
        return f'Individual(Zeros={self.genotype.count(0)}, Ones={self.genotype.count(1)})'

    def __repr__(self) -> str:
        return f"{''.join(str(gene) for gene in self.genotype)}"

In [4]:
def mutate(ind: Individual) -> Individual:
    fitness_function, n_loci, genotype = ind.fitness_function, ind.n_loci, list(ind.genotype)
    index = randint(0, n_loci - 1)
    genotype[index] = 1 - genotype[index]
    return Individual(fitness_function, n_loci, tuple(genotype))


def mutate_all(ind: Individual, *, prob: float) -> Individual:
    fitness_function, n_loci, genotype = ind.fitness_function, ind.n_loci, list(ind.genotype)
    genotype = [1 - gene if random() < prob else gene for gene in genotype]
    return Individual(fitness_function, n_loci, tuple(genotype))


def uniform_crossover(ind1: Individual, ind2: Individual) -> Individual:
    fitness_function, n_loci = ind1.fitness_function, ind1.n_loci
    randoms = [random() for _ in range(n_loci)]
    genotype = [i if r < 0.5 else o for i, o, r in zip(ind1.genotype, ind2.genotype, randoms)]
    assert len(genotype) == n_loci
    return Individual(fitness_function, n_loci, tuple(genotype))


def one_cut_crossover(ind1: Individual, ind2: Individual) -> Individual:
    fitness_function, n_loci = ind1.fitness_function, ind1.n_loci
    index = randint(0, n_loci - 1)
    offspring = Individual(fitness_function, n_loci, tuple(ind1.genotype[:index]) + tuple(ind2.genotype[index:]))
    assert len(offspring.genotype) == n_loci
    return offspring


def select_parent(population: list[Individual], tournament_size: int) -> Individual:
    pool = choices(population, k=tournament_size)
    champ = max(pool, key=lambda ind: ind.fitness)
    return champ


def create_population(fitness_function: "AbstractProblem", population_size: int) -> list[Individual]:
    return [Individual(fitness_function, LOCI) for _ in range(population_size)]


def update_population(
    population: list[Individual], offspring: list[Individual], population_size: int
) -> list[Individual]:
    population.extend(offspring)
    population.sort(key=lambda ind: ind.fitness, reverse=True)
    population = population[:population_size]
    return population


def find_best_individual(population: list[Individual]) -> Individual:
    return max(population, key=lambda ind: ind.fitness)


def choose_migrants(population: list[Individual], n: int, random: bool) -> tuple[list[Individual], list[Individual]]:
    if random:
        shuffle(population)
    else:
        population.sort(key=lambda ind: ind.fitness, reverse=True)
    migrants = population[:n]
    population = population[n:]
    return migrants, population

## Problem Instance: 1

In [5]:
PROBLEM_SIZE = 1
LOCI = 1000
POPULATION_SIZE = 30
OFFSPRING_SIZE = 20
TOURNAMENT_SIZE = 2
MUTATION_PROBABILITY = 0.80
GENERATIONS = 5000

FITNESS_FUNCTION = lab9_lib.make_problem(PROBLEM_SIZE)

In [6]:
population = create_population(FITNESS_FUNCTION, POPULATION_SIZE)

pbar = trange(GENERATIONS)
for generation in pbar:
    offspring = []
    for _ in range(OFFSPRING_SIZE):
        if random() < MUTATION_PROBABILITY:
            parent = select_parent(population, TOURNAMENT_SIZE)
            child = mutate(parent)
        else:
            parent1 = select_parent(population, TOURNAMENT_SIZE)
            parent2 = select_parent(population, TOURNAMENT_SIZE)
            child = one_cut_crossover(parent1, parent2)
        offspring.append(child)

    population = update_population(population, offspring, POPULATION_SIZE)
    pbar.set_description(f'Best Fitness {population[0].fitness:.2%}')

    if math.isclose(1, population[0].fitness):
        break

print(f"Best individual: {population[0]}, fitness: {population[0].fitness:.2%}")

Best Fitness 100.00%:  26%|██▌       | 1312/5000 [00:02<00:06, 597.28it/s]

Best individual: Individual(Zeros=0, Ones=1000), fitness: 100.00%





In [7]:
print(f'Number of fitness calls: {FITNESS_FUNCTION.calls:,}')

Number of fitness calls: 26,290


## Problem Instance: 2

In [33]:
PROBLEM_SIZE = 2
LOCI = 1000
POPULATION_SIZE = 50
OFFSPRING_SIZE = 25
TOURNAMENT_SIZE = 2
CROSSOVER_MUTATION = 0.35
GENERATIONS = 20000
SURVIVORS_PERCENTAGE = 0.15
STD = 0.0005

FITNESS_FUNCTION = lab9_lib.make_problem(PROBLEM_SIZE)

In [34]:
population = create_population(FITNESS_FUNCTION, POPULATION_SIZE)
best_individual = find_best_individual(population)

pbar = trange(GENERATIONS)
for generation in pbar:
    offspring = []

    if np.std([ind.fitness for ind in population]) < STD:
        population = choices(population, k=math.ceil(POPULATION_SIZE * SURVIVORS_PERCENTAGE))
        offspring.extend(create_population(FITNESS_FUNCTION, math.floor(POPULATION_SIZE * (1 - SURVIVORS_PERCENTAGE))))
    else:
        for _ in range(OFFSPRING_SIZE):
            if random() < CROSSOVER_MUTATION:
                parent1 = select_parent(population, TOURNAMENT_SIZE)
                parent2 = select_parent(population, TOURNAMENT_SIZE)
                child = mutate(uniform_crossover(parent1, parent2))
            else:
                parent = select_parent(population, TOURNAMENT_SIZE)
                child = mutate(parent)
            offspring.append(child)

    population = update_population(population, offspring, POPULATION_SIZE)
    pbar.set_description(f'Best Fitness {population[0].fitness:.2%}')

    if population[0].fitness > best_individual.fitness:
        best_individual = population[0]

    if math.isclose(1, population[0].fitness):
        break

print(f"Best individual: {best_individual}, fitness: {best_individual.fitness:.2%}")

Best Fitness 100.00%:  46%|████▋     | 9271/20000 [00:30<00:35, 303.54it/s]

Best individual: Individual(Zeros=0, Ones=1000), fitness: 100.00%





In [38]:
print(f'Number of fitness calls: {FITNESS_FUNCTION.calls:,}')

Number of fitness calls: 313,075


## Problem Instance: 5

In [85]:
PROBLEM_SIZE = 5
LOCI = 1000
POPULATION_SIZE = 50
OFFSPRING_SIZE = 25
TOURNAMENT_SIZE = 2
CROSSOVER_MUTATION = 0.35
GENERATIONS = 50000
SURVIVORS_PERCENTAGE = 0.15
STD = 0.0005

FITNESS_FUNCTION = lab9_lib.make_problem(PROBLEM_SIZE)

In [86]:
population = create_population(FITNESS_FUNCTION, POPULATION_SIZE)
best_individual = find_best_individual(population)

pbar = trange(GENERATIONS)
for generation in pbar:
    offspring = []

    if np.std([ind.fitness for ind in population]) < STD:
        population = choices(population, k=math.ceil(POPULATION_SIZE * SURVIVORS_PERCENTAGE))
        offspring.extend(create_population(FITNESS_FUNCTION, math.floor(POPULATION_SIZE * (1 - SURVIVORS_PERCENTAGE))))
    else:
        for _ in range(OFFSPRING_SIZE):
            if random() < CROSSOVER_MUTATION:
                parent1 = select_parent(population, TOURNAMENT_SIZE)
                parent2 = select_parent(population, TOURNAMENT_SIZE)
                child = mutate(uniform_crossover(parent1, parent2))
            else:
                parent = select_parent(population, TOURNAMENT_SIZE)
                child = mutate(parent)
            offspring.append(child)

    population = update_population(population, offspring, POPULATION_SIZE)
    pbar.set_description(f'Best Fitness {population[0].fitness:.2%}')

    if population[0].fitness > best_individual.fitness:
        best_individual = population[0]

    if math.isclose(1, population[0].fitness):
        break

print(f"Best individual: {best_individual}, fitness: {best_individual.fitness:.2%}")

Best Fitness 67.90%: 100%|██████████| 50000/50000 [02:58<00:00, 279.97it/s]

Best individual: Individual(Zeros=275, Ones=725), fitness: 67.99%





In [88]:
print(f'Number of fitness calls: {FITNESS_FUNCTION.calls:,}')

Number of fitness calls: 1,714,111


## Problem Instance: 10

In [46]:
PROBLEM_SIZE = 10
LOCI = 1000
POPULATION_SIZE = 50
OFFSPRING_SIZE = 25
TOURNAMENT_SIZE = 2
CROSSOVER_MUTATION = 0.25
GENERATIONS = 10000
SURVIVORS_PERCENTAGE = 0.15
STD = 0.0005
NUMBER_OF_ISLANDS = 50
MIGRATION_TIME = 50
NUMBER_OF_MIGRANTS = int(POPULATION_SIZE * 0.5)

FITNESS_FUNCTION = lab9_lib.make_problem(PROBLEM_SIZE)

In [47]:
islands = [create_population(FITNESS_FUNCTION, POPULATION_SIZE) for _ in range(NUMBER_OF_ISLANDS)]
best_individual = find_best_individual(map(find_best_individual, islands))

pbar = trange(GENERATIONS)
for generation in pbar:
    migrate = (generation + 1) % MIGRATION_TIME == 0
    if migrate:
        new_islands = []
        for _ in range(int(NUMBER_OF_ISLANDS // 2)):
            island1, island2 = islands.pop(), islands.pop()
            migrants1, island1 = choose_migrants(island1, NUMBER_OF_MIGRANTS, random=False)
            migrants2, island2 = choose_migrants(island2, NUMBER_OF_MIGRANTS, random=False)
            island1.extend(migrants2)
            island2.extend(migrants1)
            new_islands.extend([island1, island2])
        if len(islands) == 1:
            new_islands.append(islands.pop())
        islands = new_islands
    new_islands = []
    for island in islands:
        offspring = []
        if not migrate and np.std([ind.fitness for ind in island]) < STD:
            island = choices(island, k=math.ceil(POPULATION_SIZE * SURVIVORS_PERCENTAGE))
            offspring.extend(
                create_population(FITNESS_FUNCTION, math.floor(POPULATION_SIZE * (1 - SURVIVORS_PERCENTAGE)))
            )
        else:
            for _ in range(OFFSPRING_SIZE):
                if random() < CROSSOVER_MUTATION:
                    parent1 = select_parent(island, TOURNAMENT_SIZE)
                    parent2 = select_parent(island, TOURNAMENT_SIZE)
                    child = mutate(one_cut_crossover(parent1, parent2))
                else:
                    parent = select_parent(island, TOURNAMENT_SIZE)
                    child = mutate(parent)
                offspring.append(child)
        island = update_population(island, offspring, POPULATION_SIZE)
        new_islands.append(island)
    islands = new_islands

    best_current_generation = find_best_individual(map(find_best_individual, islands))
    pbar.set_description(f'Best Fitness {best_current_generation.fitness:.2%}')

    if best_current_generation.fitness > best_individual.fitness:
        best_individual = best_current_generation

    if math.isclose(1, best_current_generation.fitness):
        break

print(f"Best individual: {best_individual}, fitness: {best_individual.fitness:.2%}")

Best Fitness 53.00%: 100%|██████████| 10000/10000 [19:36<00:00,  8.50it/s]

Best individual: Individual(Zeros=470, Ones=530), fitness: 53.00%





In [48]:
print(f'Number of fitness calls: {FITNESS_FUNCTION.calls:,}')

Number of fitness calls: 16,075,210
