# Lab9 - Black Box EA

Wrote a local-search algorithm (eg. an EA) able to solve the Problem instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.


In [22]:
import os 
if "lab9_lib.py" not in os.listdir("."):
    !curl https://raw.githubusercontent.com/squillero/computational-intelligence/master/2023-24/lab9_lib.py > lab9_lib.py
    

In [23]:
from lab9_lib import make_problem
from tqdm.autonotebook import tqdm, trange
from collections import namedtuple
from copy import deepcopy
from dataclasses import dataclass, field
import random
from typing import Literal, Union, Callable
import numpy as np

In [24]:
LOCI = 1000
Gene = Literal[0,1]
Genome = tuple[Gene]

@dataclass(frozen=True, repr=False)
class Individual:

    genome: tuple[Gene] = field(default_factory=lambda: list(random.choices([0, 1], k = LOCI)), repr=False)
    _fitness: float = field(default=None, init=False, compare=False)

    def mutate(it: "Individual") -> "Individual":
        gene_to_mutate = 1
        mutated_genome = [*it.genome]
        for _ in range(gene_to_mutate):
            ind = random.randrange(LOCI)
            mutated_genome[ind] = 1-mutated_genome[ind]
        return Individual(mutated_genome)

    def crossover(it: "Individual", other: "Individual") -> "Individual":
        return Individual(
            [i if r < .5 else o for i, o, r in zip(it.genome, other.genome, [random.random() for _ in range(LOCI)])]
        )

    def evaluate(self: "Individual", fitness_fn: Callable[[Genome], float]) -> float:
        """Wrapped evaluation inside individual to allow some kind of caching

        Args:
            self (Individual): Individual
            fitness_fn (Callable[[Genome], float]): Fitness function

        Returns:
            float: fitness
        """
        if self._fitness is None:
            fitness = fitness_fn(self.genome)
            object.__setattr__(self, "_fitness", fitness)
        return self._fitness

    def __repr__(self: "Individual"):
        return f"Individual(Zeros={sum([1 for it in self.genome if it == 0])}, Ones={sum([it for it in self.genome])})"
        
    def __str__(self: "Individual"):
        return self.__repr__()
    
    @property
    def phenotype(self) -> str:
        return "".join(str(bit) for bit in self.genome)

In [40]:
EPOCHS = 10
MU = 10
LAMBDA = 10
MUT_RATE = 0.1
PROB_SIZE = 10
a = Individual()
b = Individual([*a.genome])
b.genome[1] = 1-b.genome[1]
c = Individual.crossover(a, b)
a, b, c, a == c, b == c

(Individual(Zeros=500, Ones=500),
 Individual(Zeros=499, Ones=501),
 Individual(Zeros=500, Ones=500),
 True,
 False)

In [19]:
def train(*, variant: Literal["comma", "plus"] = "comma",
          problem_size: int = None, mu: int = None, lambda_: int = None, epochs: int = None,
            mutation_rate: float = None, training_factor: float = 1.1):
    if epochs is None:
        epochs = EPOCHS
    if problem_size is None:
        problem_size = PROB_SIZE
    if lambda_ is None:
        lambda_ = LAMBDA
    if mu is None:
        mu = MU
    if mutation_rate is None:
        mutation_rate = MUT_RATE

    problem = make_problem(problem_size)

    parents = [Individual() for _ in range(mu)]
    parents_result = [problem(p.genome) for p in parents]
    pbar = trange(0, epochs, unit="epoch")
    streak_bar = tqdm(total=lambda_, desc="Evaluating offspring fitness", unit="streak", colour="gray", leave=False)
    for _ in pbar:
        pbar.set_description(f"Training - Fitness: {max(parents_result):.2%} p Calls: {problem.calls}")
        offspring = [(random.choice(parents)).mutate() for _ in range(lambda_)]
        results = []
        streak_bar.reset(total=lambda_)
        for i in offspring:
            results.append(problem(i.phenotype))
            streak_bar.update(1)

        incrate = (np.sum([res > sum(parents_result)/len(parents_result) for res in results])/lambda_)

        if incrate > 1/5:
            mutation_rate *= training_factor
        elif incrate < 1/5:
            mutation_rate /= training_factor

        
        population = list(zip(results, offspring))
        if variant == "plus":
            population.extend(list(zip(parents_result, parents)))
        population = sorted(population, key=lambda i:i[0], reverse=True)[:mu]

        parents = [it[1] for it in population]
        parents_result = [it[0] for it in population]
    streak_bar.close()
    best_ind = np.argmax(parents_result)

    return {
        "best": (parents_result[best_ind], parents[best_ind]),
        "parents": list(zip(parents_result, parents)),
        "mutation_rate": mutation_rate
    }


In [20]:
a = train(problem_size=50, epochs=100, variant='plus', mu=1, lambda_=50, mutation_rate=1)['best']
a

  0%|          | 0/100 [00:00<?, ?epoch/s]

Evaluating offspring fitness:   0%|          | 0/50 [00:00<?, ?streak/s]

(1.0, Individual(Zeros=466, Ones=534))