In [1]:
import numpy as np
import random
import logging
from copy import copy
from collections import namedtuple

In [2]:
logging.getLogger().setLevel(logging.INFO)

In [3]:
PROBLEM_SIZE = 5
POPULATION_SIZE = 5
OFFSPRING_SIZE = 5

NUM_GENERATIONS = 100
MAX_INT = 100_000_000

In [4]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        tuple(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [5]:
genes = problem(5, 42)

In [6]:
from traitlets import Integer


Individual = namedtuple("Individual", ["genome", "fitness"])


def onemax(genome, N):
    return -sum([len(g) for g in genome]) if is_valid(genome, N) else -MAX_INT


def tournament(population, tournament_size=2):
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)


def cross_over(g1, g2):
    cut = random.randint(0, PROBLEM_SIZE)
    return g1[:cut] + g2[cut:]

def distance(gen, N):
    """
    Function that assigns a penalty to a genome.
    It is based on the distance between a set on the range of the PROBLEM_SIZE
    and a generated set of the genes' elements.
    """
    dist = set(range(N))
    set_cover = set()
    [[set_cover.add(_) for _ in g] for g in gen]
    return -len(dist - set_cover)

def fitness(genome, N):
    #return (onemax(genome), -repetitions(genome, N), -distance(genome, N))
    return (onemax(genome, N), -distance(genome, N))

def repetitions(genome, N):
    '''
    count the repetition of the alleles and retrieve their sum
    in order to give the genome a "malus"
    '''
    set_cover = set()
    [[set_cover.add(_) for _ in g ]for g in genome]
    set_rep = dict.fromkeys(set_cover, -1)
    
    for gen in genome:
        for g in gen:
            set_rep[g] += 1
    
    return -sum(set_rep.values())

def is_valid(genome, N):
    return distance(genome, N) == 0

def mutation(g):
    point = random.randint(0, PROBLEM_SIZE - 1)
    point_1 = random.randint(0, PROBLEM_SIZE - 1)
    point_2 = random.randint(0, PROBLEM_SIZE - 1)
    swap_1 = g[point_1]
    swap_2 = g[point_2]
    return g[:point_1] + (swap_2) + g[point_1+1:point_2] + (swap_1) + g[point_2+1:]
    #return tuple(sorted(g))
    #return g[:point] + tuple(random.choices(genes)) + g[point + 1 :]
    # better solution can be found in swapping two genes

### Initial Population

### My code

In [7]:
def generate_population(PROBLEM_SIZE):
    population = list()
    genes = problem(PROBLEM_SIZE, 42)
    for genome in [tuple([random.choice(genes) for _ in range(PROBLEM_SIZE)]) for _ in range(POPULATION_SIZE)]:
        population.append(Individual(genome, fitness(genome, PROBLEM_SIZE)))

    #logging.info(f"init: pop_size={len(population)}; max={max(population, key=lambda i: i.fitness)[1]}")
    return population

### Evolution

In [8]:
def evolution(PROBLEM_SIZE, POPULATION_SIZE, population):

    fitness_log = [(0, i.fitness) for i in population]

    for g in range(NUM_GENERATIONS):
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            if random.random() < 0.3:
                p = tournament(population)
                o = mutation(p.genome)
            else:
                p1 = tournament(population)
                p2 = tournament(population)
                o = cross_over(p1.genome, p2.genome)
            #f = onemax(o)
            f = fitness(o, PROBLEM_SIZE)
            fitness_log.append((g + 1, f))
            offspring.append(Individual(o, f))
        population += offspring
        population = sorted(population, key=lambda i: i.fitness, reverse=False)[:POPULATION_SIZE]

    logging.info(f'Population size: {PROBLEM_SIZE}, best solution: {population[0].fitness}')


In [9]:
for i in [5,10,20,50, 100, 500, 1000]:
    population = generate_population(i)
    evolution(i, i, population)

TypeError: 'int' object is not iterable