# Lab 2

In [371]:
import logging
from collections import namedtuple
import random
from matplotlib import pyplot as plt

In [372]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [373]:
PROBLEM_SIZE = 1000
POPULATION_SIZE = 200
OFFSPRING_SIZE = 150

MUTATION_RATE = 0.4
# TODO: try to increase TOURNAMENT_SIZE 
TOURNAMENT_SIZE = 2
NUM_GENERATIONS = 1000

In [374]:
#logging.getLogger().setLevel(logging.INFO)

blocks = problem(PROBLEM_SIZE)
goal = [x for x in range(PROBLEM_SIZE)]

In [375]:
Individual = namedtuple("Individual", ["genome", "fitness"])


def onemax(genome):
    return sum(genome)


def tournament(population, tournament_size=2):
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)


def cross_over(g1, g2):
    cut1 = random.randint(0, len(g1))
    cut2 = random.randint(0,len(g2))

    if random.choice([0,1]) == 1:
        return g1[:cut1] + g2[cut2:]
    else:
        return g2[:cut2] + g1[cut1:]



def mutation(g):
    point = random.randint(0, len(g))
    
    if random.choice([0,1]) == 1:
        # remove a locus
        return g[:point] + g[point + 1 :]
    else:
        # replace a locus
        return g[:point] + (random.choice(blocks),) + g[point + 1 :]
    

def compute_fitness(genome):
    # create a flatten list
    tot_elements = [x for sublist in genome for x in sublist]
    # transformed then into set in order to check the number of unique elements
    covered_list = set(tot_elements)

    n_covered =  len(covered_list)
    n_repetition = len(tot_elements) - n_covered

    # avoid problem given by divison to 0 
    # 1 is the highest value obtained by 1/n_repetition so we set the fitness with no repetition to 2
    if n_repetition == 0:
        return (n_covered, 2)       
    else:
        return (n_covered, 1/n_repetition)

# Genetic Algorithm

In [376]:
logging.getLogger().setLevel(logging.INFO)

## Initial Population

In [377]:
import pprint
population = list()

for genome in [tuple(random.sample(blocks, random.randint(2,PROBLEM_SIZE))) for _ in range(POPULATION_SIZE)]:
    population.append(Individual(genome, compute_fitness(genome)))

#logging.info(f"init: pop_size={len(population)}; max={max(population, key=lambda i: i.fitness)[1]}")

## Evolution

In [378]:
for g in range(NUM_GENERATIONS):
    offspring = list()
    old_population = population.copy()

    for i in range(OFFSPRING_SIZE):
        if random.random() < MUTATION_RATE:
            p = tournament(population, tournament_size=TOURNAMENT_SIZE)
            o = mutation(p.genome)
        else:
            p1 = tournament(population, tournament_size=TOURNAMENT_SIZE)
            p2 = tournament(population, tournament_size=TOURNAMENT_SIZE)
            o = cross_over(p1.genome, p2.genome)
        f = compute_fitness(o)
        #fitness_log.append((g + 1, f))
        offspring.append(Individual(o, f))
    population += offspring
    population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]



In [379]:
N = PROBLEM_SIZE
print(f"Found solution for N={N}: w={sum(len(_) for _ in population[0].genome)} (bloat={(sum(len(_) for _ in population[0].genome)-N)/N*100:.0f}%)")

Found solution for N=1000: w=3352 (bloat=235%)


## Performance Evaluation

In [380]:
# off_line = [max(f[1] for f in fitness_log if f[0] == x) / (x + 1) for x in range(NUM_GENERATIONS)]
# on_line = [max(f[1] for f in fitness_log if f[0] <= x) / (x + 1) for x in range(NUM_GENERATIONS)]
# gen_best = [max(f[1] for f in fitness_log if f[0] == x) for x in range(NUM_GENERATIONS)]

# plt.figure(figsize=(15, 6))
# plt.scatter([x for x, _ in fitness_log], [y for _, y in fitness_log], marker=".")
# plt.plot([x for x, _ in enumerate(gen_best)], [y for _, y in enumerate(gen_best)])
# plt.plot([x for x, _ in enumerate(on_line)], [y for _, y in enumerate(on_line)])
# plt.plot([x for x, _ in enumerate(off_line)], [y for _, y in enumerate(off_line)])