# Lab 2

In [511]:
import logging
from collections import namedtuple
import random

In [512]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

Parameters setting

In [513]:
POPULATION_SIZE = 150
OFFSPRING_SIZE = 100

MUTATION_RATE = 0.4
TOURNAMENT_SIZE = 5
NUM_GENERATIONS = 500

In [514]:
Individual = namedtuple("Individual", ["genome", "fitness"])

def tournament(population, tournament_size=2):
    return max(random.sample(population,tournament_size), key=lambda i: i.fitness)



def cross_over(g1, g2):
    cut1 = random.randint(0, len(g1))
    cut2 = random.randint(0,len(g2))

    if random.choice([0,1]) == 1:
        return g1[:cut1] + g2[cut2:]
    else:
        return g2[:cut2] + g1[cut1:]



def mutation(g, blocks):
    point = random.randint(0, len(g))
    
    if random.choice([0,1]) == 1:
        # remove a locus
        return g[:point] + g[point + 1 :]
    else:
        # replace a locus
        return g[:point] + (random.choice(blocks),) + g[point + 1 :]
    

def compute_fitness(genome):
    # create a flatten list
    tot_elements = [x for sublist in genome for x in sublist]
    # transformed then into set in order to check the number of unique elements
    covered_list = set(tot_elements)

    n_covered =  len(covered_list)
    n_repetition = len(tot_elements) - n_covered

    # 1 is the highest value obtained by 1/n_repetition so we set the fitness with no repetition to 0.5
    # in order to also avoid the problem of divison by 0 
    if n_repetition == 0:
        return (n_covered, 0.5)       
    else:
        return (n_covered, 1/n_repetition)

In [515]:
logging.getLogger().setLevel(logging.INFO)

## Genetic Algorithm

In [516]:
def genetic_algorithm(PROBLEM_SIZE):
    # generate problem
    blocks = problem(PROBLEM_SIZE)
    # create the initial population
    population = list()
    for genome in [tuple(random.sample(blocks, random.randint(2,PROBLEM_SIZE))) for _ in range(POPULATION_SIZE)]:
        population.append(Individual(genome, compute_fitness(genome)))

    # evolution
    for _ in range(NUM_GENERATIONS):
        offspring = list()
        # offspring creation though mutation or crossover
        for _ in range(OFFSPRING_SIZE):
            if random.random() < MUTATION_RATE:
                p = tournament(population, tournament_size=TOURNAMENT_SIZE)
                o = mutation(p.genome, blocks)
            else:
                p1 = tournament(population, tournament_size=TOURNAMENT_SIZE)
                p2 = tournament(population, tournament_size=TOURNAMENT_SIZE)
                o = cross_over(p1.genome, p2.genome)
            f = compute_fitness(o)
            offspring.append(Individual(o, f))
        population += offspring
        # selection based on fittness
        population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]

    return population

In [517]:
for N in [10, 20, 100, 500, 1000]:
    population = genetic_algorithm(N)
    print(f"Found solution for N={N}: w={sum(len(_) for _ in population[0].genome)} (bloat={(sum(len(_) for _ in population[0].genome)-N)/N*100:.0f}%)")

Found solution for N=10: w=11 (bloat=10%)
Found solution for N=20: w=24 (bloat=20%)
Found solution for N=100: w=184 (bloat=84%)
Found solution for N=500: w=1461 (bloat=192%)
Found solution for N=1000: w=3440 (bloat=244%)
