In [16]:
import numpy as np

In [17]:
import random
from collections import namedtuple
from matplotlib import pyplot as plt


def GA_set_covering(N):

    def problem(N, seed=None):
        random.seed(seed)
        return [
            list(set(random.randint(0, N - 1)
                 for n in range(random.randint(N // 5, N // 2))))
            for n in range(random.randint(N, N * 5))
        ]

    def evaluate_weight( genome):
        indexes = (i for i, x in enumerate(genome) if x == 1)
        all_elements = [element for sublist in [generated_problem[x] for x in indexes] for element in sublist]
        return len(all_elements)

    def is_solving( genome):
        indexes = (i for i, x in enumerate(genome) if x == 1)
        all_elements = [element for sublist in (generated_problem[x] for x in indexes) for element in sublist]
        coverage = len(set(all_elements))
        return coverage == N

    def evaluate_fitness( genome):
        # fitness considers how many numbers are already covered and the weight of the solution
        indexes = (i for i, x in enumerate(genome) if x == 1)
        all_elements = [element for sublist in (generated_problem[x] for x in indexes) for element in sublist]
        weight = len(all_elements)
        coverage = len(set(all_elements))
        #boost_for_correct_solution = 2*N if coverage == N else 0
        #return boost_for_correct_solution + coverage - 0.5*weight
        return coverage, -weight
        

    def tournament( population, tournament_size=5):
        return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)
        # if random.random() < 0.8 else random.choice(population)

    def cross_over( g1, g2):
        cut = random.randint(0, PROBLEM_SIZE)
        return g1[:cut] + g2[cut:]

    def cross_over2(g1, g2):
        return tuple(random.choice([g1[e], g2[e]]) for e in range(0, PROBLEM_SIZE))

    def mutation( g):
        point = random.randint(0, PROBLEM_SIZE-1)
        return g[:point] + (1 - g[point],) + g[point + 1:]
    
    def plot_performance(self):
        off_line = [max(f[1] for f in fitness_log if f[0] == x) / (x + 1)
                    for x in range(NUM_GENERATIONS)]
        on_line = [max(f[1] for f in fitness_log if f[0] <= x) / (x + 1)
                   for x in range(NUM_GENERATIONS)]
        gen_best = [max(f[1] for f in fitness_log if f[0] == x)
                    for x in range(NUM_GENERATIONS)]

        plt.figure(figsize=(15, 6))
        plt.scatter([x for x, _ in fitness_log], [
                    y for _, y in fitness_log], marker=".")
        plt.plot([x for x, _ in enumerate(gen_best)],
                 [y for _, y in enumerate(gen_best)])
        plt.plot([x for x, _ in enumerate(on_line)],
                 [y for _, y in enumerate(on_line)])
        plt.plot([x for x, _ in enumerate(off_line)],
                 [y for _, y in enumerate(off_line)])


    Individual = namedtuple("Individual", ["genome", "fitness"])
    generated_problem = problem(N, 42)
    
    PROBLEM_SIZE = len(generated_problem)
    POPULATION_SIZE = 30
    NUM_GENERATIONS = 300
    OFFSPRING_SIZE = 80

    population = list()
    # for genome in [tuple([random.choice([1, 0]) for _ in range(PROBLEM_SIZE)]) for _ in range(POPULATION_SIZE)]:
    #     population.append(Individual(genome, evaluate_fitness(genome)))
    genome0 = tuple(0 for e in range(PROBLEM_SIZE))
    f0 = evaluate_fitness(genome0)
    for genome in range(POPULATION_SIZE):
        population.append(Individual(genome0, f0))

    fitness_log = [(0, i.fitness) for i in population]

    for g in range(NUM_GENERATIONS):
        offspring = list()
        lr = 0.2 if g < 200 else 0.7
        for i in range(OFFSPRING_SIZE):
            if random.random() < lr:
                p = tournament(population)
                o = mutation(p.genome)
            else:
                p1 = tournament(population)
                p2 = tournament(population)
                o = cross_over2(p1.genome, p2.genome)
            f = evaluate_fitness(o)
            fitness_log.append((g + 1, f))
            offspring.append(Individual(o, f))
        population += offspring
        population = sorted(population, key=lambda i: i.fitness, reverse=True)[
            :POPULATION_SIZE]
        print(f"gen: {g}, fitness: {population[0].fitness}, weight: {evaluate_weight(population[0].genome)}, solves? {is_solving(population[0].genome)}")
    return [population[0].fitness, evaluate_weight(population[0].genome)]



In [18]:

res = GA_set_covering(500)

print(f"fitness: {res[0]}, weight: {res[1]}")


gen: 0, fitness: (188, -188), weight: 188, solves? False
gen: 1, fitness: (310, -365), weight: 365, solves? False
gen: 2, fitness: (424, -701), weight: 701, solves? False
gen: 3, fitness: (454, -898), weight: 898, solves? False
gen: 4, fitness: (490, -1458), weight: 1458, solves? False
gen: 5, fitness: (495, -1626), weight: 1626, solves? False
gen: 6, fitness: (498, -1790), weight: 1790, solves? False
gen: 7, fitness: (500, -2118), weight: 2118, solves? True
gen: 8, fitness: (500, -1921), weight: 1921, solves? True
gen: 9, fitness: (500, -1921), weight: 1921, solves? True
gen: 10, fitness: (500, -1921), weight: 1921, solves? True
gen: 11, fitness: (500, -1921), weight: 1921, solves? True
gen: 12, fitness: (500, -1921), weight: 1921, solves? True
gen: 13, fitness: (500, -1921), weight: 1921, solves? True
gen: 14, fitness: (500, -1921), weight: 1921, solves? True
gen: 15, fitness: (500, -1921), weight: 1921, solves? True
gen: 16, fitness: (500, -1921), weight: 1921, solves? True
gen: 17,

KeyboardInterrupt: 