# Lab9 - Black Box EA

Wrote a local-search algorithm (eg. an EA) able to solve the Problem instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.


In [107]:
import os 
if "lab9_lib.py" not in os.listdir("."):
    !curl https://raw.githubusercontent.com/squillero/computational-intelligence/master/2023-24/lab9_lib.py > lab9_lib.py
    

In [108]:
from lab9_lib import make_problem
from tqdm import tqdm, trange
from collections import namedtuple
from copy import deepcopy
from dataclasses import dataclass, field
import random
from typing import Literal, Union, Callable
import numpy as np
import math

In [109]:
LOCI = 1000
Gene = Literal[0,1]
Genome = tuple[Gene]

@dataclass(frozen=True, repr=False)
class Individual:

    genome: tuple[Gene] = field(default_factory=lambda: list(random.choices([0, 1], k = LOCI)), repr=False)
    _fitness: float = field(default=None, init=False, compare=False)

    def mutate(it: "Individual") -> "Individual":
        gene_to_mutate = 10
        mutated_genome = [*it.genome]
        for _ in range(gene_to_mutate):
            mutated_genome[random.randrange(LOCI)] ^= 1
        return Individual(mutated_genome)

    def crossover(it: "Individual", other: "Individual", mode: Literal["uniform", "onecut"] = None) -> "Individual":
        if mode is None:
            mode = 'uniform'
        if mode == "uniform":
            return Individual(
                [i if r < .5 else o for i, o, r in zip(it.genome, other.genome, [random.random() for _ in range(LOCI)])]
            )
        elif mode == 'onecut':
            cut = random.randrange(0, len(it.genome))
            new_genome = [*it.genome[:cut], *other.genome[cut:]]
            assert len(new_genome) == len(it.genome), f"Somehow created a child with {len(new_genome)} loci"
            return Individual(new_genome)


    def evaluate(self: "Individual", fitness_fn: Callable[[Genome], float]) -> float:
        """Wrapped evaluation inside individual to allow some kind of caching

        Args:
            self (Individual): Individual
            fitness_fn (Callable[[Genome], float]): Fitness function

        Returns:
            float: fitness
        """
        if self._fitness is None:
            fitness = fitness_fn(self.genome)
            object.__setattr__(self, "_fitness", fitness)
        return self._fitness

    @property
    def fitness(self) -> float:
        assert self._fitness is not None, "Fitness has not been evaluated yet"
        return self._fitness

    def __repr__(self: "Individual"):
        return f"I(Zeros={sum([1 for it in self.genome if it == 0])}, Ones={sum([it for it in self.genome])}{'' if self.fitness is None else f', Fit={self.fitness}'})"
        
    def __str__(self: "Individual"):
        return self.__repr__()
    
    @property
    def phenotype(self) -> str:
        return "".join(str(bit) for bit in self.genome)

## Problem Size 1

In [127]:
PROB_SIZE = 1
EPOCHS = 10000
POP_SIZE = 50
OFFSPRING_SIZE = 25
CROSSOVER_PROB = .2
TOURNAMENT_SIZE = 3

# Not used here
SURVIVAL_RATE = .15
CONVERGENCENESS_THRESHOLD = 0.0001

In [128]:
Result = namedtuple("Result", ['individual', 'calls', 'size', 'epoch'])
def train(*, crossover_mode: str = None, extinction: bool = False, convergence_measure: Callable[[list[Individual]], list[float]] = None):
    
    if convergence_measure is None:
        convergence_measure = lambda x: [i.fitness for i in x]
    
    problem = make_problem(PROB_SIZE)
    
    parents = [Individual() for _ in range(POP_SIZE)]
    for i in parents:
        i.evaluate(problem)

    max_fitness: Callable[[list["Individual"]], float] = lambda x: max([i.fitness for i in x])
    best_in_list: Callable[[list["Individual"]], "Individual"] = lambda x: [c for c in x if c.fitness == max_fitness(x)][0]
    tournament_selection: Callable[[list["Individual"]], "Individual"] = lambda l: best_in_list(random.choices(l, k=TOURNAMENT_SIZE))
    epoch_bar = trange(0, EPOCHS, unit="epoch")
    extinctions = 0
    best: Result = None
    if extinction:
        update_epoch_bar = lambda: epoch_bar.set_description(f"Fitness {max_fitness(parents):.2%} - #Calls: {problem.calls} - Extinctions: {extinctions}")
    else:
        update_epoch_bar = lambda: epoch_bar.set_description(f"Fitness {max_fitness(parents):.2%} - #Calls: {problem.calls}")

    for epoch in epoch_bar:
        update_epoch_bar()
        if math.isclose(1, best_in_list(parents).fitness):
            break
        offspring = []
        convergenceness = convergence_measure(parents)
        if extinction and np.std(convergenceness) < CONVERGENCENESS_THRESHOLD:
            extinctions += 1
            to_purge = int(len(parents) * SURVIVAL_RATE)
            parents = random.choices(parents, k=to_purge)
            for _ in range(POP_SIZE - len(parents)):
                ind = Individual()
                ind.evaluate(problem)
                parents.append(ind)
        else:
            for i in range(OFFSPRING_SIZE):
                new_ind: "Individual"
                if random.random() < CROSSOVER_PROB:
                    new_ind = tournament_selection(parents).crossover(tournament_selection(parents), mode=crossover_mode)
                else:
                    new_ind = tournament_selection(parents)
                new_ind = new_ind.mutate()
                new_ind.evaluate(problem)
                offspring.append(new_ind)
            parents = sorted([*parents, *offspring], key=lambda i:i.fitness, reverse=True)[:POP_SIZE]
        
                    
        best_ind = best_in_list(parents)
        if best is None or best.individual.fitness < best_ind.fitness:
            best = Result(best_ind, problem.calls, problem.x, epoch)
                
    return best

In [118]:
CROSSOVER_PROB = 0.5
best_one = train()

Fitness 57.40% - #Calls: 200:   0%|          | 4/10000 [00:00<04:28, 37.24epoch/s]

Fitness 98.50% - #Calls: 250025: 100%|██████████| 10000/10000 [02:40<00:00, 62.48epoch/s]


In [120]:
best_one

Result(individual=I(Zeros=15, Ones=985, Fit=0.985), calls=215250, size=1, epoch=8607)

# Problem Size 2
 - With Extinction based on the population fitness 

In [129]:
PROB_SIZE = 2
CROSSOVER_PROB = .5
CONVERGENCENESS_THRESHOLD = 0.001
best_two = train(extinction=True, crossover_mode='onecut')

Fitness 25.13% - #Calls: 75 - Extinctions: 0:   0%|          | 0/10000 [00:00<?, ?epoch/s]

Fitness 87.80% - #Calls: 250169 - Extinctions: 8: 100%|██████████| 10000/10000 [01:58<00:00, 84.59epoch/s]


In [130]:
best_two

Result(individual=I(Zeros=122, Ones=878, Fit=0.878), calls=248594, size=2, epoch=9935)

# Problem Size 5
 - With Extinction based on the population fitness

In [161]:
PROB_SIZE = 5
CROSSOVER_PROB = .2
best_five = train(extinction=True)

Fitness 56.36% - #Calls: 250025 - Extinctions: 0: 100%|██████████| 10000/10000 [02:08<00:00, 78.03epoch/s]


In [163]:
best_five

Result(individual=I(Zeros=344, Ones=656, Fit=0.5636), calls=250000, size=5, epoch=9997)

In [160]:
def convergenceness_distance(pop: list[Individual]) -> list[float]:
    """For each individual the sum of the edit distances to each other individual

    Args:
        pop (list[Individual]): population

    Returns:
        list[float]: sum of edit distances for each ind
    """
    def edit_distance(it: "Individual", other: "Individual") -> float:
        dist = sum([e1 ^ e2 for e1, e2 in zip(it.genome, other.genome)])
        return dist
    return [
        sum([edit_distance(it, other) for j, other in enumerate(pop) if j != i]) for i, it in enumerate(pop)
    ]

## Island Implementation

In [173]:
ISLANDS = 2
MIGRATION_STEP = 1000
MIGRANT_COUNT = 5
def train_with_islands(*, crossover_mode: str = None, extinction: bool = False, convergence_measure: Callable[[list[Individual]], list[float]] = None):
    
    if convergence_measure is None:
        convergence_measure = lambda x: [i.fitness for i in x]
    
    problem = make_problem(PROB_SIZE)
    
    archipelago = [[Individual() for _ in range(POP_SIZE)] for _ in range(ISLANDS)]
    for parents in archipelago:
        for i in parents:
            i.evaluate(problem)

    max_fitness: Callable[[list["Individual"]], float] = lambda x: max([i.fitness for i in x])
    best_in_list: Callable[[list["Individual"]], "Individual"] = lambda x: [c for c in x if c.fitness == max_fitness(x)][0]
    tournament_selection: Callable[[list["Individual"]], "Individual"] = lambda l: best_in_list(random.choices(l, k=TOURNAMENT_SIZE))
    epoch_bar = trange(0, EPOCHS, unit="epoch")
    extinctions = 0
    best: Result = None
    if extinction:
        update_epoch_bar = lambda: epoch_bar.set_description(f"Fitness {max_fitness(list([ind for pop in archipelago for ind in pop])):.2%} - #Calls: {problem.calls} - Extinctions: {extinctions}")
    else:
        update_epoch_bar = lambda: epoch_bar.set_description(f"Fitness {max_fitness([ind for pop in archipelago for ind in pop]):.2%} - #Calls: {problem.calls}")

    for epoch in epoch_bar:

        if (epoch+1) % MIGRATION_STEP == 0:
            random.shuffle(archipelago)
            for idx in range(0,ISLANDS,2):
                # swap
                tmp = archipelago[idx][:MIGRANT_COUNT]
                archipelago[idx + 1][:MIGRANT_COUNT] = archipelago[idx][:MIGRANT_COUNT]
                archipelago[idx + 1][:MIGRANT_COUNT] = tmp
            pass

        for ic, parents in enumerate(archipelago):
            update_epoch_bar()
            if math.isclose(1, best_in_list(parents).fitness):
                break
            offspring = []
            convergenceness = convergence_measure(parents)
            if extinction and np.std(convergenceness) < CONVERGENCENESS_THRESHOLD:
                extinctions += 1
                to_purge = int(len(parents) * SURVIVAL_RATE)
                parents = random.choices(parents, k=to_purge)
                for _ in range(POP_SIZE - len(parents)):
                    ind = Individual()
                    ind.evaluate(problem)
                    parents.append(ind)
            else:
                for i in range(OFFSPRING_SIZE):
                    new_ind: "Individual"
                    if random.random() < CROSSOVER_PROB:
                        new_ind = tournament_selection(parents).crossover(tournament_selection(parents), mode=crossover_mode)
                    else:
                        new_ind = tournament_selection(parents)
                    new_ind = new_ind.mutate()
                    new_ind.evaluate(problem)
                    offspring.append(new_ind)
                parents = sorted([*parents, *offspring], key=lambda i:i.fitness, reverse=True)[:POP_SIZE]
            
                        
            best_ind = best_in_list(parents)
            if best is None or best.individual.fitness < best_ind.fitness:
                best = Result(best_ind, problem.calls, problem.x, epoch)

            archipelago[ic] = parents
            
                
    return best

In [174]:
PROB_SIZE = 10
POP_SIZE = 20
OFFSPRING_SIZE = 10
CROSSOVER_PROB - .2
best_ten = train_with_islands()

Fitness 16.20% - #Calls: 200:   0%|          | 8/10000 [00:00<02:13, 75.07epoch/s]

Fitness 33.58% - #Calls: 200030: 100%|██████████| 10000/10000 [02:02<00:00, 81.60epoch/s]


### Results
As you can see i did not obtain great results, I believe there's much room for improvement, starting from some parameter tweaking and also implementing some more advanced techniques like the ones we saw in class.
An improvement could be made by using a different "convergence measure" used for extinction, based on the genome instead of the fitness.
I also tried to implement a migration policy, but i did not have enough time to test it properly.

In [175]:
best_one, best_two, best_five, best_ten

(Result(individual=I(Zeros=15, Ones=985, Fit=0.985), calls=215250, size=1, epoch=8607),
 Result(individual=I(Zeros=122, Ones=878, Fit=0.878), calls=248594, size=2, epoch=9935),
 Result(individual=I(Zeros=344, Ones=656, Fit=0.5636), calls=250000, size=5, epoch=9997),
 Result(individual=I(Zeros=478, Ones=522, Fit=0.33579005), calls=197110, size=10, epoch=9853))