# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [1]:
from __future__ import annotations

from random import choices, randint, random
from dataclasses import dataclass, field
from copy import deepcopy
from typing import Callable
import numpy as np
import math
from tqdm.notebook import trange


import lab9_lib
from lab9_lib import AbstractProblem

In [2]:
PROBLEM_SIZE = 10
LOCI = 1000

In [3]:
fitness = lab9_lib.make_problem(PROBLEM_SIZE)
for n in range(10):
    ind = choices([0, 1], k=50)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}")

ind = choices([1], k=50)
print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}")
print(fitness.calls)

00110000110011101000101001110001101111011110010010: 9.34%
00000010111011110111010001011011000111001101011100: 15.33%
11110111011100010100010000010101001011000010000010: 15.56%
11010011010001001001000100000011111111000000110101: 7.33%
00100110111111110100011011110101111101101111001101: 9.11%
10110000100011110101100110001001101000010011011001: 15.34%
00010001010000101001101111001011001010111001100001: 15.34%
11111101101011001000100110001010010011011111001010: 19.33%
11101001001000101011011111010110001110101010110010: 9.13%
01110000000111011100010011011100101010100001010110: 35.56%
11111111111111111111111111111111111111111111111111: 100.00%
11


In [4]:
@dataclass(frozen=True, init=False)
class Individual:
    _n_loci: int
    _genotype: tuple[int]
    _fitness: float
    _fitness_function: Callable[[Individual], float]

    def __init__(self, fitness_function: "AbstractProblem", n_loci: int, genotype: tuple[int] = None):
        object.__setattr__(self, "_n_loci", n_loci)
        if genotype is None:
            genotype = tuple(choices([0, 1], k=n_loci))
        object.__setattr__(self, "_genotype", genotype)
        assert len(self._genotype) == self._n_loci, 'n_loci field and genotype length do not match'
        object.__setattr__(self, "_fitness_function", fitness_function)
        object.__setattr__(self, "_fitness", fitness_function(self.genotype))

    @property
    def n_loci(self):
        return self._n_loci

    @property
    def genotype(self):
        return self._genotype

    @property
    def fitness(self):
        return self._fitness

    @property
    def fitness_function(self):
        return self._fitness_function

    def __str__(self):
        return f'Individual(Zeros={self.genotype.count(0)}, Ones={self.genotype.count(1)})'

    def __repr__(self) -> str:
        return f"{''.join(str(gene) for gene in self.genotype)}"

In [30]:
def mutate_all(ind: Individual, *, prob: float) -> Individual:
    fitness_function, n_loci, genotype = ind.fitness_function, ind.n_loci, list(ind.genotype)
    genotype = [1 - gene if random() < prob else gene for gene in genotype]
    return Individual(fitness_function, n_loci, tuple(genotype))


def mutate(ind: Individual) -> Individual:
    fitness_function, n_loci, genotype = ind.fitness_function, ind.n_loci, list(ind.genotype)
    index = randint(0, n_loci - 1)
    genotype[index] = 1 - genotype[index]
    return Individual(fitness_function, n_loci, tuple(genotype))


def uniform_crossover(ind1: "Individual", ind2: "Individual") -> "Individual":
    fitness_function, n_loci = ind1.fitness_function, ind1.n_loci
    randoms = [random() for _ in range(n_loci)]
    genotype = [i if r < 0.5 else o for i, o, r in zip(ind1.genotype, ind2.genotype, randoms)]
    assert len(genotype) == n_loci
    return Individual(fitness_function, n_loci, tuple(genotype))


def one_cut_crossover(ind1: "Individual", ind2: "Individual") -> "Individual":
    fitness_function, n_loci = ind1.fitness_function, ind1.n_loci
    index = randint(0, n_loci - 1)
    offspring = Individual(fitness_function, n_loci, tuple(ind1.genotype[:index]) + tuple(ind2.genotype[index:]))
    assert len(offspring.genotype) == n_loci
    return offspring


def select_parent(population: list[Individual], tournament_size: int):
    pool = choices(population, k=tournament_size)
    champ = max(pool, key=lambda ind: ind.fitness)
    return champ

## Problem Instance: 1

In [6]:
PROBLEM_SIZE = 1
LOCI = 1000
POPULATION_SIZE = 30
OFFSPRING_SIZE = 20
TOURNAMENT_SIZE = 2
MUTATION_PROBABILITY = 0.15
GENERATIONS = 5000

FITNESS_FUNCTION = lab9_lib.make_problem(PROBLEM_SIZE)

In [7]:
population = [Individual(FITNESS_FUNCTION, LOCI) for _ in range(POPULATION_SIZE)]

pbar = trange(GENERATIONS)
for generation in pbar:
    offspring = []
    for _ in range(OFFSPRING_SIZE):
        if random() < MUTATION_PROBABILITY:
            parent = select_parent(population, TOURNAMENT_SIZE)
            child = mutate(parent)
        else:
            parent1 = select_parent(population, TOURNAMENT_SIZE)
            parent2 = select_parent(population, TOURNAMENT_SIZE)
            child = one_cut_crossover(parent1, parent2)
        offspring.append(child)

    population.extend(offspring)
    population.sort(key=lambda ind: ind.fitness, reverse=True)
    population = population[:POPULATION_SIZE]
    pbar.set_description(f'Best Fitness {population[0].fitness:.2%}')

    if math.isclose(1, population[0].fitness):
        break

print(f"Best individual: {population[0]}, fitness: {population[0].fitness:.2%}")

  0%|          | 0/5000 [00:00<?, ?it/s]

Best individual: Individual(Zeros=0, Ones=1000), fitness: 100.00%


In [8]:
print(f'Number of fitness calls: {FITNESS_FUNCTION.calls}')

Number of fitness calls: 48830


## Problem Instance: 2

In [97]:
PROBLEM_SIZE = 2
LOCI = 1000
POPULATION_SIZE = 50
OFFSPRING_SIZE = 25
TOURNAMENT_SIZE = 2
CROSSOVER_MUTATION = 0.25
GENERATIONS = 20000
SURVIVORS_PERCENTAGE = 0.15
STD = 0.0005

FITNESS_FUNCTION = lab9_lib.make_problem(PROBLEM_SIZE)

In [98]:
population = [Individual(FITNESS_FUNCTION, LOCI) for _ in range(POPULATION_SIZE)]
best_individual = max(population, key=lambda ind: ind.fitness)

pbar = trange(GENERATIONS)
for generation in pbar:
    offspring = []

    if np.std([ind.fitness for ind in population]) < STD:
        population = choices(population, k=int(POPULATION_SIZE * SURVIVORS_PERCENTAGE))

        for _ in range(int(POPULATION_SIZE * (1 - SURVIVORS_PERCENTAGE))):
            offspring.append(Individual(FITNESS_FUNCTION, LOCI))
    else:
        for _ in range(OFFSPRING_SIZE):
            if random() < CROSSOVER_MUTATION:
                parent1 = select_parent(population, TOURNAMENT_SIZE)
                parent2 = select_parent(population, TOURNAMENT_SIZE)
                child = mutate(uniform_crossover(parent1, parent2))
            else:
                parent = select_parent(population, TOURNAMENT_SIZE)
                child = mutate(parent)
            offspring.append(child)

    population.extend(offspring)
    population.sort(key=lambda ind: ind.fitness, reverse=True)
    population = population[:POPULATION_SIZE]
    pbar.set_description(f'Best Fitness {population[0].fitness:.2%}')

    if population[0].fitness > best_individual.fitness:
        best_individual = population[0]

    if math.isclose(1, population[0].fitness):
        break

print(f"Best individual: {best_individual}, fitness: {best_individual.fitness:.2%}")

  0%|          | 0/20000 [00:00<?, ?it/s]

Best individual: Individual(Zeros=0, Ones=1000), fitness: 100.00%


In [99]:
print(f'Number of fitness calls: {FITNESS_FUNCTION.calls}')

Number of fitness calls: 419152


## Problem Instance: 5

In [103]:
PROBLEM_SIZE = 5
LOCI = 1000
POPULATION_SIZE = 50
OFFSPRING_SIZE = 25
TOURNAMENT_SIZE = 2
CROSSOVER_MUTATION = 0.25
GENERATIONS = 20000
SURVIVORS_PERCENTAGE = 0.15
STD = 0.0005

FITNESS_FUNCTION = lab9_lib.make_problem(PROBLEM_SIZE)

In [104]:
population = [Individual(FITNESS_FUNCTION, LOCI) for _ in range(POPULATION_SIZE)]
best_individual = max(population, key=lambda ind: ind.fitness)

pbar = trange(GENERATIONS)
for generation in pbar:
    offspring = []

    if np.std([ind.fitness for ind in population]) < STD:
        population = choices(population, k=int(POPULATION_SIZE * SURVIVORS_PERCENTAGE))

        for _ in range(int(POPULATION_SIZE * (1 - SURVIVORS_PERCENTAGE))):
            offspring.append(Individual(FITNESS_FUNCTION, LOCI))
    else:
        for _ in range(OFFSPRING_SIZE):
            if random() < CROSSOVER_MUTATION:
                parent1 = select_parent(population, TOURNAMENT_SIZE)
                parent2 = select_parent(population, TOURNAMENT_SIZE)
                child = mutate(uniform_crossover(parent1, parent2))
            else:
                parent = select_parent(population, TOURNAMENT_SIZE)
                child = mutate(parent)
            offspring.append(child)

    population.extend(offspring)
    population.sort(key=lambda ind: ind.fitness, reverse=True)
    population = population[:POPULATION_SIZE]
    pbar.set_description(f'Best Fitness {population[0].fitness:.2%}')

    if population[0].fitness > best_individual.fitness:
        best_individual = population[0]

    if math.isclose(1, population[0].fitness):
        break

print(f"Best individual: {best_individual}, fitness: {best_individual.fitness:.2%}")

  0%|          | 0/20000 [00:00<?, ?it/s]

Best individual: Individual(Zeros=475, Ones=525), fitness: 52.50%


In [105]:
print(f'Number of fitness calls: {FITNESS_FUNCTION.calls}')

Number of fitness calls: 625163
