In [1]:
import logging
import random
from copy import copy
from collections import namedtuple
from matplotlib import pyplot as plt

## Problem instances generator

In [2]:
def problem(N, seed=42):
    """Creates an instance of the problem"""

    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

## Genetic Algorithm

In [3]:
N = 5
all_lists = problem(N)

PROBLEM_SIZE = len(all_lists)
POPULATION_SIZE = 20
OFFSPRING_SIZE = 30

NUM_GENERATIONS = 10000

GOAL = set(range(N))

In [4]:
def flatten(l):
    return [item for sublist in l for item in sublist]

def fitness(genome, all_lists):
    x = list()
    for i, g in enumerate(genome):
        x.append(g * all_lists[i])
    return(1 - int(set(flatten(x)) == GOAL), sum(len(_) for _ in x))

def tournament(population, tournament_size=2):
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)

def cross_over(g1, g2):
    cut = random.randint(0, PROBLEM_SIZE)
    return g1[:cut] + g2[cut:]

def mutation(g):
    point = random.randint(0, PROBLEM_SIZE - 1)
    return g[:point] + (1 - g[point],) + g[point + 1 :] 

def check_duplicates(genome, population):
    population_genome = [p.genome for p in population]
    return(genome in population_genome)

def print_sol(genome, all_lists):
    sol = list()
    for i, g in enumerate(genome):
        sol.append(g * all_lists[i])
    return [s for s in sol if s != []]

## Initial Population

In [5]:
population = list()
Individual = namedtuple("Individual", ["genome", "fitness"])

i = 0
while i < POPULATION_SIZE:
    genome = tuple([random.choice([1, 0]) for _ in range(PROBLEM_SIZE)])
    
    if check_duplicates(genome, population):
        i -= 1
    else:
        population.append(Individual(genome, fitness(genome, all_lists))) 
    i += 1
       

for p in population:
    print(p.genome)

(1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0)
(0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1)
(1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1)
(1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1)
(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0)
(0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1)
(1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1)
(0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0)
(1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0)
(0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0)
(1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1)
(1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1)
(0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0)
(0, 0, 1, 1,

## Evolution

In [6]:
fitness_log = [(0, i.fitness) for i in population]
best_res_log = [(0,(sorted(population, key = lambda i: i.fitness)[0])[1])]

for g in range(NUM_GENERATIONS):
    offspring = list()
    i = 0
    while i < OFFSPRING_SIZE:
        
        if random.random() < 0.3:
            p = tournament(population)
            o = mutation(p.genome)
            #o = mutation(o)
        else:
            p1 = tournament(population)
            p2 = tournament(population)
            o = cross_over(p1.genome, p2.genome)
        
        if check_duplicates(o, population) or check_duplicates(o, offspring):
            i -= 1
        else:
            f = fitness(o, all_lists)
            fitness_log.append((g + 1, f))
            offspring.append(Individual(o, f))
        i += 1

    population += offspring
    population = sorted(population, key = lambda i: i.fitness)[:POPULATION_SIZE]
    best_res_log.append((g+1, population[0][1]))  
    if population[0][1] == (0, N):
        break

w = population[0][1][1]
print("N =", N, "| w =", w, " | num.gen. =", g + 1)

N = 5 | w = 5  | num.gen. = 17


In [7]:
for p in population:
    print(p.genome)

(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0)
(0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)
(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0)
(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0)
(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0)
(0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)
(0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0)
(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0)
(1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)
(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0)
(0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0)
(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0)
(0, 1, 0, 1,

In [8]:
for p in population:
    print(print_sol(p.genome, all_lists), p.fitness)

[[0], [3], [2, 4], [1]] (0, 5)
[[1], [4], [0], [0, 2], [3]] (0, 6)
[[4], [0], [0, 2], [3], [1]] (0, 6)
[[4], [0], [3], [2, 4], [1]] (0, 6)
[[1, 3], [0], [3], [2, 4]] (0, 6)
[[1], [4], [0], [0, 2], [3]] (0, 6)
[[1], [4], [0], [3], [2, 4]] (0, 6)
[[4], [1], [0], [2, 4], [3]] (0, 6)
[[4], [0], [2, 4], [3], [1]] (0, 6)
[[0], [1], [4], [0, 2], [3]] (0, 6)
[[4], [0], [3], [2, 4], [1]] (0, 6)
[[1], [4], [0, 2], [3], [0]] (0, 6)
[[0], [0], [3], [2, 4], [1]] (0, 6)
[[1], [4], [0], [3], [2, 4], [1]] (0, 7)
[[0], [1], [4], [0], [0, 2], [3]] (0, 7)
[[0], [4], [0], [3], [2, 4], [1]] (0, 7)
[[0], [1, 3], [0], [3], [2, 4]] (0, 7)
[[4], [0, 1], [0], [0, 2], [3]] (0, 7)
[[1], [4], [0], [0, 2], [3], [0]] (0, 7)
[[4], [1, 3], [0], [0, 2], [3]] (0, 7)


### Final solution

In [9]:
def ga(N):
    
    def flatten(l):
        return [item for sublist in l for item in sublist]

    def fitness(genome, all_lists):
        x = list()
        for i, g in enumerate(genome):
            x.append(g * all_lists[i])
        return(1 - int(set(flatten(x)) == GOAL), sum(len(_) for _ in x))

    def tournament(population, tournament_size=2):
        return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)

    def cross_over(g1, g2):
        cut = random.randint(0, PROBLEM_SIZE)
        return g1[:cut] + g2[cut:]

    def mutation(g):
        point = random.randint(0, PROBLEM_SIZE - 1)
        return g[:point] + (1 - g[point],) + g[point + 1 :] 

    def check_duplicates(genome, population):
        population_genome = [p.genome for p in population]
        return(genome in population_genome)
    
    import time
    start_time = time.time()


    all_lists = problem(N)

    PROBLEM_SIZE = len(all_lists)
    POPULATION_SIZE = 20
    OFFSPRING_SIZE = 30

    NUM_GENERATIONS = 10000

    GOAL = set(range(N))


    population = list()
    Individual = namedtuple("Individual", ["genome", "fitness"])

    i = 0
    while i < POPULATION_SIZE:
        genome = tuple([random.choice([1, 0]) for _ in range(PROBLEM_SIZE)])
        
        if check_duplicates(genome, population):
            i -= 1
        else:
            population.append(Individual(genome, fitness(genome, all_lists))) 
        i += 1
    

    fitness_log = [(0, i.fitness) for i in population]
    best_res_log = [(0,(sorted(population, key = lambda i: i.fitness)[0])[1])]

    for g in range(NUM_GENERATIONS):
        offspring = list()


        i = 0
        while i < OFFSPRING_SIZE:
            
            if random.random() < 0.3:
                p = tournament(population)
                o = mutation(p.genome)
                #o = mutation(o)
            else:
                p1 = tournament(population)
                p2 = tournament(population)
                o = cross_over(p1.genome, p2.genome)
            
            if check_duplicates(o, population) or check_duplicates(o, offspring):
                i -= 1
            else:
                f = fitness(o, all_lists)
                fitness_log.append((g + 1, f))
                offspring.append(Individual(o, f))
            i += 1
        
        
        population += offspring
        population = sorted(population, key = lambda i: i.fitness)[:POPULATION_SIZE]
        best_res_log.append((g+1, population[0][1]))  
        if population[0][1] == (0, N):
            break
        end_time = time.time()

        if end_time - start_time > 600:
            break


        print (round(100*g/NUM_GENERATIONS, 3), " %", " time: ", round(end_time - start_time, 1), " s |||", sep='', end="\r")
        

    w = population[0][1][1]
    print('', end="\r")
    print("N =", N, "| w =", w, "| num.gen. =", g + 1, "\t\t\t\t\t\t")
    

In [10]:
for N in [5, 10, 20, 100, 500, 1000]:
    ga(N)

N = 5 | w = 5 | num.gen. = 17 						
N = 10 | w = 11 | num.gen. = 10000 						 % time: 4.4 s ||| % time: 5.5 s |||
N = 20 | w = 24 | num.gen. = 10000 						
N = 100 | w = 229 | num.gen. = 10000 						1 s ||| % time: 35.9 s ||| % time: 36.5 s |||
N = 500 | w = 1651 | num.gen. = 10000 						
N = 1000 | w = 335162 | num.gen. = 662 						
