In [1]:
import numpy as np
import random
import logging
from copy import copy
from collections import namedtuple

In [2]:
logging.getLogger().setLevel(logging.INFO)

In [3]:
NUM_GENERATIONS = 100
MAX_INT = 100_000_000

In [4]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        tuple(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [5]:
genes = problem(5, 42)

# Main Methods

In [6]:
Individual = namedtuple("Individual", ["genome", "fitness"])


def w(genome, N):
    return -sum([len(g) for g in genome]) if is_valid(genome, N) else -MAX_INT


def tournament(population, tournament_size=5):
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)


def cross_over(g1, g2, N, genes):
    cut = random.randint(0, min([len(g1), len(g2)]))
    g_new = g1[:cut] + g2[cut:]
    g_max_old = g1 if w(g1, N) > w(g2, N) else g2
    if len(g_new) < N:
        gene = random.choice(tuple(set(genes) - set(g_new)))
        g_new = g1[:cut] + (gene,) + g2[cut:]
    return g_new if w(g_new, N) > w(g_max_old, N) else g_max_old

def distance(gen, N):
    """
    Function that assigns a penalty to a genome.
    It is based on the distance between a set on the range of the PROBLEM_SIZE
    and a generated set of the genes' elements.
    """
    dist = set(range(N))
    set_cover = set()
    [[set_cover.add(_) for _ in g] for g in gen]

    return len(dist - set_cover)

def fitness(genome, N):
    return w(genome, N)

def is_valid(genome, N):
    return distance(genome, N) == 0

def mutation(g,N, genes):
    point = random.randint(0, len(g) - 1)
    gene = random.choice(tuple(set(genes) - set(g)))
    new_genome = g[:point] + (gene,) + g[point + 1 :]

    return new_genome if w(new_genome, N) > w(g, N) else g

### Initial Population

In [11]:
def generate_population(PROBLEM_SIZE, POPULATION_SIZE):
    population = list()
    genes = problem(PROBLEM_SIZE, 42)
    for _ in range(POPULATION_SIZE):
        genome = tuple(random.sample(genes, random.randint(1, PROBLEM_SIZE)))
        population.append(Individual(genome, fitness(genome, PROBLEM_SIZE)))

    return population, genes

### Evolution

In [24]:
def evolution(PROBLEM_SIZE, POPULATION_SIZE, population, genes):

    fitness_log = [(0, i.fitness) for i in population]
    
    OFF_SIZE = int((PROBLEM_SIZE - PROBLEM_SIZE * 0.02 * np.round(np.log2(PROBLEM_SIZE)))) if PROBLEM_SIZE > 50 else PROBLEM_SIZE

    if PROBLEM_SIZE == 1000:
        OFF_SIZE = 450
    
    for g in range(NUM_GENERATIONS):
        offspring = list()
        for i in range(OFF_SIZE):
            if random.random() < 0.4:
                p = tournament(population)
                o = mutation(p.genome, i, genes)
            else:
                p1 = tournament(population)
                p2 = tournament(population)
                o = cross_over(p1.genome, p2.genome, i, genes)
            f = fitness(o, PROBLEM_SIZE)
            fitness_log.append((g, f))
            offspring.append(Individual(o, f))
        population += offspring
        population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]

    logging.info(f'Problem size: {PROBLEM_SIZE}, Population size: {POPULATION_SIZE}, offspring size: {OFF_SIZE}, best solution: {-population[0].fitness}')
    return fitness_log


In [25]:
for i in [5, 10, 20, 50, 100, 500, 1000]:
    POPULATION_SIZE = i * 10 if i < 500 else i * 5
    population, genes = generate_population(i, POPULATION_SIZE)
    fitness_log = evolution(i, POPULATION_SIZE, population, genes)
    

INFO:root:Problem size: 5, Population size: 50, offspring size: 5, best solution: 5
INFO:root:Problem size: 10, Population size: 100, offspring size: 10, best solution: 12
INFO:root:Problem size: 20, Population size: 200, offspring size: 20, best solution: 27
INFO:root:Problem size: 50, Population size: 500, offspring size: 50, best solution: 95
INFO:root:Problem size: 100, Population size: 1000, offspring size: 86, best solution: 241
INFO:root:Problem size: 500, Population size: 2500, offspring size: 410, best solution: 1690
INFO:root:Problem size: 1000, Population size: 5000, offspring size: 450, best solution: 4016
