In [49]:
import numpy as np

In [50]:
import random
from collections import namedtuple
from matplotlib import pyplot as plt


def GA_set_covering(N):

    def problem(N, seed=None):
        random.seed(seed)
        return [
            list(set(random.randint(0, N - 1)
                 for n in range(random.randint(N // 5, N // 2))))
            for n in range(random.randint(N, N * 5))
        ]

    def evaluate_weight( genome):
        indexes = [i for i, x in enumerate(genome) if x == 1]
        all_elements = [element for sublist in [generated_problem[x]
                                                for x in indexes] for element in sublist]
        return len(all_elements)

    def is_solving( genome):
        indexes = [i for i, x in enumerate(genome) if x == 1]
        all_elements = [element for sublist in [generated_problem[x]
                                                for x in indexes] for element in sublist]
        coverage = len(set(all_elements))
        return coverage == N

    def evaluate_fitness( genome):
        # fitness considers how many numbers are already covered and the weight of the solution
        indexes = [i for i, x in enumerate(genome) if x == 1]
        all_elements = [element for sublist in [generated_problem[x]
                                                for x in indexes] for element in sublist]
        weight = len(all_elements)
        coverage = len(set(all_elements))
        boost_for_correct_solution = 10 if coverage == N else 0
        return boost_for_correct_solution + 0.3*coverage - 0.1*weight

    def tournament( population, tournament_size=8):
        return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)

    def cross_over( g1, g2):
        cut = random.randint(0, PROBLEM_SIZE)
        return g1[:cut] + g2[cut:]

    def mutation( g):
        point = random.randint(0, PROBLEM_SIZE-1)
        return g[:point] + (1 - g[point],) + g[point + 1:]
    
    def plot_performance(self):
        off_line = [max(f[1] for f in fitness_log if f[0] == x) / (x + 1)
                    for x in range(NUM_GENERATIONS)]
        on_line = [max(f[1] for f in fitness_log if f[0] <= x) / (x + 1)
                   for x in range(NUM_GENERATIONS)]
        gen_best = [max(f[1] for f in fitness_log if f[0] == x)
                    for x in range(NUM_GENERATIONS)]

        plt.figure(figsize=(15, 6))
        plt.scatter([x for x, _ in fitness_log], [
                    y for _, y in fitness_log], marker=".")
        plt.plot([x for x, _ in enumerate(gen_best)],
                 [y for _, y in enumerate(gen_best)])
        plt.plot([x for x, _ in enumerate(on_line)],
                 [y for _, y in enumerate(on_line)])
        plt.plot([x for x, _ in enumerate(off_line)],
                 [y for _, y in enumerate(off_line)])


    Individual = namedtuple("Individual", ["genome", "fitness"])
    generated_problem = problem(N)
    PROBLEM_SIZE = len(generated_problem)
    POPULATION_SIZE = 30
    NUM_GENERATIONS = 300
    OFFSPRING_SIZE = 80
    population = list()
    for genome in [tuple([random.choice([1, 0]) for _ in range(PROBLEM_SIZE)]) for _ in range(POPULATION_SIZE)]:
        population.append(Individual(genome, evaluate_fitness(genome)))
    fitness_log = [(0, i.fitness) for i in population]

    for g in range(NUM_GENERATIONS):
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            if random.random() < 0.5:
                p = tournament(population)
                o = mutation(p.genome)
            else:
                p1 = tournament(population)
                p2 = tournament(population)
                o = cross_over(p1.genome, p2.genome)
            f = evaluate_fitness(o)
            fitness_log.append((g + 1, f))
            offspring.append(Individual(o, f))
        population += offspring
        population = sorted(population, key=lambda i: i.fitness, reverse=True)[
            :POPULATION_SIZE]

    indexes = [i for i, x in enumerate(population[0].genome) if x == 1]
    corresponding = [generated_problem[x] for x in indexes]
    #print(f"genome: {population[0].genome},\n corr: {corresponding},\n fitness: {population[0].fitness},\n weight: {evaluate_weight(population[0].genome)},\n solves? {is_solving(population[0].genome)}")
    return [population[0].fitness, evaluate_weight(population[0].genome)]



In [51]:
ts = np.zeros((50, 2))

for i in range(0,1):
    ts[i]+=(GA_set_covering(500))


In [53]:
#f_mean, w_mean = ts.mean(axis=0)
#print(f"f_mean: {f_mean}, w_mean: {w_mean}")
ts

array([[-7888.5, 80485. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
       [    0. ,     0. ],
 