In [671]:
#Copyright (c) 2022 Giovanni Squillero
#https://github.com/squillero/computational-intelligence
#Free for personal or classroom use; see LICENSE.md for details.

import random
import sys
from collections import namedtuple

In [672]:
#SETTING THE PARAMETERS OF THE PROBLEM
#for each of them we have tried to tune it in order to get the best possible results and these are the optimal value that we found at the end

N = 1000

POPULATION_SIZE = N         
OFFSPRING_SIZE = N*2        

NUM_GENERATIONS = 200        

TOURNAMENT_SIZE =int(N/2)
GENETIC_OPERATOR_RANDOMNESS = 0.3

MAX_STASIS=60
ARTIFICIAL_MUTATION=50000

best_fit = sys.float_info.min, sys.float_info.min

In [673]:
def problem(N, seed=42):
    #state = random.getstate()
    random.seed(seed)
    p = [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]
    #random.setstate(state)
    return p

In [674]:
GOAL={i for i in range(N)}
#print(GOAL)
list_of_lists = problem(N)
tmp =  {tuple(x) for x in list_of_lists}    # optimization: remove repeated inner list
list_of_lists = list(tmp)                   
#print(list_of_lists)
PROBLEM_SIZE = len(list_of_lists)           

In [675]:
Individual = namedtuple("Individual", ["genome", "fitness"])

#converte genome into a singol list
def gen2List(genome):
    list = []
    for i, g in enumerate(genome):
        if g:
            list += list_of_lists[i]
    return list

#implementation of the parent selection
def tournament(population, tournament_size=TOURNAMENT_SIZE):          
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness) 

#implementation of 2 different types of cross_over: 
# we decided to choose the uniform one the allowed us to obtain best results

def one_cut_cross_over(g1, g2):                      
    cut = random.randint(0, PROBLEM_SIZE)
    return g1[:cut] + g2[cut:]

def uniform_cross_over(g1, g2):
    new_genoma = []
    for i in range(PROBLEM_SIZE):
        if i%2:
            new_genoma.append(g1[i])
        else:
            new_genoma.append(g2[i])
    return tuple(new_genoma)

#implementation of 2 different types of mutation: 
# in the initial part of the algorithm is preferable to do more exploration than exploitation and only at the end
# of the algorithm reverse this trend for this reason in the first part we use the normal one mutation function
# while at the end when we have a situation of steady state we focus only on the best individual and try to
# modify it so we use the artificial_mutation function

def mutation(g):                                
    point = random.randint(0, PROBLEM_SIZE - 1)      
    return g[:point] + (1 - g[point],) + g[point + 1 :]

def artificial_mutation(g):
    N=sum(g)
    turn_off = random.randint(1, N)
    count=0
    list_g=list(g)
    for i,el in enumerate(list_g):
        if el:
            count+=1
            if count==turn_off:
                list_g[i]=0
    return mutation(tuple(list_g))

#implementation of the fitness: 
#our fitness consists of a tuple, the first term indicates how far the current genome is from the solution 
#while the second is a regularization term that favors genomes with a smaller number of repetitions

def compute_fitness(genome):                                
    list = gen2List(genome)
    repetitions = len(list) - len(set(list))
    return N - len(GOAL - set(list)), -repetitions
    

def check_goal(genoma):
    if set(gen2List(genoma)) == GOAL:
        return True
    else:
        return False
        

EVOLUTION

In [676]:
for initial_population in range(4):
    print(best_fit)
    population = set()

    # for genome in [tuple([1 if random.random() < 0.3 else 0 for _ in range(PROBLEM_SIZE)]) for _ in range(POPULATION_SIZE)]:
    #     population.append(Individual(genome, compute_fitness(genome))) 
    while len(population)<POPULATION_SIZE:    
        genome=tuple(0 for _ in range(PROBLEM_SIZE))
        genome = mutation(genome)    #the initial genomes of the population are created by setting randomly only one element to 1 
        population.add(Individual(genome, compute_fitness(genome))) 
    population=list(population)
    #print(len(population))
    #print(population)
    stasis=0
    for g in range(NUM_GENERATIONS):
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            if random.random() < GENETIC_OPERATOR_RANDOMNESS:                         
                p = tournament(population)                  
                o = mutation(p.genome)                    
            else:                                          
                p1 = tournament(population)                 
                p2 = tournament(population)
                o = uniform_cross_over(p1.genome, p2.genome)            
            f = compute_fitness(o)                                                          
            offspring.append(Individual(o, f))                 
        population += offspring 
        population=list(set(population))     
        population = sorted(population, key=lambda i: i[1], reverse=True)[:POPULATION_SIZE]
    
        #steady state situation => we change a bit only the best individual
        stasis+=1
        if stasis>(MAX_STASIS*(initial_population+1)):
            artificial_population=[]
            artificial_population.append(best_individual)
            for ind in artificial_population:
                for a in range(ARTIFICIAL_MUTATION):
                    o=artificial_mutation(tuple(ind.genome))
                    f = compute_fitness(o)
                    frankenstein=Individual(o,f)
                    if best_fit < frankenstein[1] and check_goal(frankenstein[0]):
                        artificial_population.append(frankenstein)
                        best_individual=Individual(frankenstein[0], frankenstein[1])
                        best_fit = tuple(frankenstein[1])
                        w = len(gen2List(frankenstein[0]))
                        gen=g
                        gen_a=a+1
            break
        
        #checking for the best individual 
        if best_fit < population[0][1] and check_goal(population[0][0]):
            stasis=0
            best_individual=Individual(population[0][0],population[0][1])
            best_fit = tuple(population[0][1])
            w = len(gen2List(population[0][0]))
            gen = g
            gen_a=0
            
    print(f"conv_fit {best_fit} found at {gen}+{gen_a} with N : {N} and w : {w}")

(2.2250738585072014e-308, 2.2250738585072014e-308)
conv_fit (1000, -2040) found at 68+20538 with N : 1000 and w : 3040
(1000, -2040)
conv_fit (1000, -2040) found at 68+20538 with N : 1000 and w : 3040
(1000, -2040)
conv_fit (1000, -2040) found at 68+20538 with N : 1000 and w : 3040
(1000, -2040)
conv_fit (1000, -2040) found at 68+20538 with N : 1000 and w : 3040
