In [15]:
import random
import math
from collections import namedtuple, deque

### Problem definition

In [16]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

N = 100
random.seed(42)
generated_problem = list(set([tuple(x) for x in problem(N, seed=42)]))

## Genetic Algorithm

### Genetic Operators  

In [17]:
Individual = namedtuple("Individual", ["genome", "fitness"])

PROBLEM_SIZE = len(generated_problem)
POPULATION_SIZE = 2*N
NUM_GENERATIONS = 200
OFFSPRING_SIZE = math.ceil(1.5*N)
STEADY_STATE = 20

def tournament(population, tournament_size=2):
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness) # if random.random() < 0.8 else random.choice(population)

def cross_over(g1, g2):
    cut = random.randint(0, PROBLEM_SIZE)
    return g1[:cut] + g2[cut:]

def cross_over2(g1, g2):
    return tuple(random.choice([g1[e], g2[e]]) for e in range(0, PROBLEM_SIZE))

def mutation(g):
    point = random.randint(0, PROBLEM_SIZE-1)
    return g[:point] + (1 - g[point],) + g[point + 1:]
    


### Evaluate fitness and other useful function

In [18]:
def evaluate_fitness(genome):
    # fitness considers the amount of numbers already covered and the weight of the solution
    indexes = (i for i, x in enumerate(genome) if x == 1)
    all_elements = [element for sublist in (generated_problem[x] for x in indexes) for element in sublist]
    weight = len(all_elements)
    coverage = len(set(all_elements))
    #mean_reps = np.array([int(e) for e in Counter(all_elements).values()]).mean()
    #boost_for_correct_solution = 2*N if coverage == N else 0
    #return boost_for_correct_solution + coverage - 0.5*weight
    return coverage, -weight

### Initial Population generation

In [19]:
def initialize_population():
    population = deque()
    fitness_log = [(0, i.fitness) for i in population]
    ### RANDOM GENERATION
     # for genome in [tuple([random.choice([1, 0]) for _ in range(PROBLEM_SIZE)]) for _ in range(POPULATION_SIZE)]:
     #     population.append(Individual(genome, evaluate_fitness(genome)))

    ### 0 GENERATION
    genome0 = tuple(0 for e in range(PROBLEM_SIZE))
    f0 = evaluate_fitness(genome0)
    for genome in range(POPULATION_SIZE):
        population.append(Individual(genome0, f0))

    return population


### Evolution

In [20]:
def run_evolution():
    population = initialize_population()
    last_fittest = deque()
    for g in range(NUM_GENERATIONS):
        offspring = list()
        mr = 0.4 if g < 3*NUM_GENERATIONS//4 else 0.7
        for i in range(OFFSPRING_SIZE):
            if random.random() < 0.3:
                p = tournament(population)
                o = mutation(p.genome)
            else:
                p1 = tournament(population)
                p2 = tournament(population)
                o = cross_over(p1.genome, p2.genome)
            offspring.append(Individual(o, evaluate_fitness(o)))
        population += offspring
        population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
        fittest = population[0]
        last_fittest.append(fittest.fitness)

        if last_fittest.count(fittest.fitness) == STEADY_STATE:
           break

        #print(f"gen: {g}, fitness: {fittest.fitness}")

    return population[0].fitness


In [21]:
arr = [5, 10, 20, 100, 500, 1000]
for N in arr:
    generated_problem = list(set([tuple(x) for x in problem(N)]))
    PROBLEM_SIZE = len(generated_problem)
    POPULATION_SIZE = 2*N
    OFFSPRING_SIZE = math.ceil(1.5*N)
    res = run_evolution()
    print(res)

(4, -4)
(10, -12)
(20, -30)
(100, -204)


KeyboardInterrupt: 