In [427]:
import numpy as np
import random
import pandas as pd

In [428]:
np.random.seed(2)
random.seed(2)

In [429]:
def init_population(pop_size, roll_size):
    
    return [np.random.randint(-1, 4, size=(roll_size,)) for _ in range(pop_size)]

In [430]:
def respect_defects(threshold, start, end, roll_defects):
    pos_defects = {"a": 0, "b": 0, "c": 0}
    for pos in range(start, end):
        for key in roll_defects[pos]:
            pos_defects[key] += (roll_defects[pos][key])
    return all(threshold[key] >= pos_defects[key] for key in pos_defects.keys())

In [431]:
def fitness(ind, biscuits_list, roll_defects):
    
    score = 0
    size = 0
    last_elem = -2  # So we don't have to check if we test the first value of the list each iteration doing ind[i-1]
    for i, elem in enumerate(ind):
        if elem == -1:  # If no biscuit value no need to test anymore
            score -= (1 + size)
            size = 0
        else:
            if elem != last_elem:
                score -= size
                size = 1
            else:
                size += 1
            if size == biscuits_list[elem]["size"]:  # Test the defects only if the biscuit reach its required size
                if respect_defects(biscuits_list[elem]["threshold"], i - size + 1, i + 1, roll_defects):
                    score += biscuits_list[elem]["value"]
                    size = 0
                else:
                    score -= 1
                    size -= 1
        last_elem = elem
    score -= size  # Don't forget to remove last biscuit pieces that are not whole
    return score

In [440]:
def get_slice_score(position, size, lb_size, lb_value, lb_threshold):  # lb stands for last biscuit
    score = 0
    unassigned = [i for i in range(position - size, position)]  # Ensure we give a value to every biscuit of the last biscuit type 
    last_size = min(lb_size, size)
    while unassigned and last_size != 0:  # While we don't have assigned a value for every last position
        j = 0
        assigned = []
        while j <= len(unassigned) - last_size:
            pos = unassigned[j]
            start = pos  # Start of the continuation of the previous value
            end = pos + last_size
            if all(elem in unassigned for elem in range(start, end)):
                if respect_defects(lb_threshold, start, end, roll_defects):
                    score += (last_size/lb_size)**2 * lb_value  # the closer the biscuit is to its full size, the more importance is given to it
                    for rem in range(start, end):
                        assigned.append(rem)
                    j += last_size - 1  # Put -1 because there is a j+=1 at the end of the loop anw 
            j+=1
        for assi in  assigned:
            unassigned.remove(assi) 
        last_size -= 1
    score -= len(unassigned)  # -1 For all element that do not respect defects even alone
    return score

In [433]:
# We can try to give more importants to biscuits that aren't full but almost
def fitness_2(ind, biscuits_list, roll_defects):
    
    score = 0
    size = 0
    last_elem = ind[0]  # So we don't have to check if we test the first value of the list each iteration doing ind[i-1]
    for i, elem in enumerate(ind):
        if elem == last_elem:
            size += 1
        else:
            if last_elem == -1:
                score -= size
            else:
                slice_score = get_slice_score(i, size, biscuits_list[last_elem]["size"], biscuits_list[last_elem]["value"], biscuits_list[last_elem]["threshold"])
                score += slice_score
            size = 1
        last_elem = elem
    score += get_slice_score(len(ind), size, biscuits_list[last_elem]["size"], biscuits_list[last_elem]["value"], biscuits_list[last_elem]["threshold"])
                
    return score

In [434]:
def evolve_pop(population, mutation_rate, elite_ratio, biscuits_list, roll_defects, fitness):
    fitness_values = [fitness(ind, biscuits_list, roll_defects) for ind in population]

    min_fitness = min(fitness_values)
    shifted_fitness = [f - min_fitness + 1 for f in fitness_values]  # Add 1 to avoid zero fitness

    fitness_sum = sum(shifted_fitness)
    probabilities = [f / fitness_sum for f in shifted_fitness]
    
    elite_idx = np.argsort(fitness_values)[int(-len(population) * elite_ratio):]
    
    elites = [population[i] for i in elite_idx]

    new_population = []
    while len(new_population) < len(population) - len(elites):
        
        # Cross over
        # Select two parents based on fitness probabilities
        parents_indices = np.random.choice(len(population), size=2, replace=False, p=probabilities)
        parent1 = population[parents_indices[0]]
        parent2 = population[parents_indices[1]]

        child = np.zeros(parent1.shape)
        break_points = np.random.choice(len(parent1), size=3, replace=False)
        break_points = np.insert(break_points, 0, [0, len(parent1)])
        break_points.sort()
        #slices = [(break_points[i], break_points[i+1]) for i in range(len(break_points)-1)]
        for i in range(len(break_points)-1):
            start = break_points[i]
            end = break_points[i+1]
            chosen_p = parent1 if random.random() < 0.5 else parent2
            child[start:end] = chosen_p[start:end]

        # Mutation
        muted_child = np.array([gene if random.random() > mutation_rate else random.randint(-1, 3) for gene in child] )

        new_population.append(muted_child)
        
    return new_population + elites

In [435]:
def genetic_algorithm(pop_size, mutation_rate, elite_ratio, biscuits_list, roll_defects, roll_size, max_iter, display, fitness):
    
    population = init_population(pop_size, roll_size)
    
    for i in range(max_iter):
        population = evolve_pop(population, mutation_rate, elite_ratio, biscuits_list, roll_defects, fitness)
        
        if (i+1) % display == 0:
            # Metric computation, remove 4 lines below to go faster
            fitness_values = [fitness(ind, biscuits_list, roll_defects) for ind in population]
            elite_idx = np.argsort(fitness_values)[-1]
            elite = population[elite_idx]
            print(f'Generation {i+1}: Best fitness {fitness_values[elite_idx]}')

    fitness_values = [fitness(ind, biscuits_list, roll_defects) for ind in population]
    elite_idx = np.argsort(fitness_values)[-1]
    elite = population[elite_idx]
    
    return elite
    

In [436]:
df = pd.read_csv("defects.csv")
print(df.shape)
df = df.sort_values(by="x")
df.head(1)

(500, 2)


Unnamed: 0,x,class
479,0.700561,a


In [437]:
# dict format -> id : (value, size, defects_threshold)

biscuits_list = {
    -1: ({"value": -1, "size": 1, "threshold": {"a":9, "b":9, "c":9}}),
     0: ({"value":  3, "size": 4, "threshold": {"a":4, "b":2, "c":3}}),
     1: ({"value": 12, "size": 8, "threshold": {"a":5, "b":4, "c":4}}),
     2: ({"value":  1, "size": 2, "threshold": {"a":1, "b":2, "c":1}}),
     3: ({"value":  8, "size": 5, "threshold": {"a":4, "b":2, "c":3}}),
}

In [438]:
roll_size = 500
roll_defects = {i: {"a": 0, "b": 0, "c": 0} for i in range(roll_size)}
for _, row in df.iterrows():
    if int(row["x"]) >= roll_size:
        break
    roll_defects[int(row["x"])][row["class"]] += 1

In [441]:
pop_size = 1000
mutation_rate = 0.02
elite_ratio = 0.1
max_iter = 200
display = max_iter // 10

In [442]:
result = genetic_algorithm(pop_size, mutation_rate, elite_ratio, biscuits_list, roll_defects, roll_size, max_iter, display, fitness_2)

Generation 20: Best fitness 164.14249999999984
Generation 40: Best fitness 232.14499999999984
Generation 60: Best fitness 276.4324999999997
Generation 80: Best fitness 315.6924999999998
Generation 100: Best fitness 343.7649999999998
Generation 120: Best fitness 371.32749999999976
Generation 140: Best fitness 401.56999999999977
Generation 160: Best fitness 404.19499999999977
Generation 180: Best fitness 408.69249999999994
Generation 200: Best fitness 415.0874999999998


In [443]:
result

array([ 3.,  2.,  0.,  2.,  3.,  3.,  3.,  0.,  3.,  3.,  3.,  3.,  0.,
        3.,  3.,  3.,  3.,  2.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  0.,  2.,  1.,  3.,  3.,  3.,  2.,  2.,  1.,  3.,  3.,  2.,
        1.,  2.,  0.,  3.,  3.,  3.,  3.,  1.,  0.,  1.,  2.,  3.,  3.,
        2.,  3.,  3.,  1.,  2.,  3.,  3.,  3.,  3.,  3.,  3.,  1.,  3.,
        3.,  0.,  3.,  1.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,
        3.,  0.,  2.,  3.,  3.,  3.,  3.,  3.,  3., -1.,  0.,  3.,  1.,
        0.,  3.,  3., -1.,  3.,  2.,  1.,  1.,  1.,  3.,  1.,  1.,  2.,
        2.,  1.,  3.,  3.,  3.,  0.,  0.,  1.,  3.,  3.,  3.,  3.,  3.,
        0.,  0.,  3., -1.,  3.,  3.,  3.,  3.,  3.,  2.,  1.,  1.,  0.,
        3.,  3.,  3.,  3.,  3.,  2.,  2.,  0.,  0.,  2.,  3.,  3.,  3.,
        3.,  3.,  1.,  1.,  1.,  3.,  3.,  3.,  3.,  3.,  0.,  2.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  3.,
        3.,  3.,  0., -1.,  3.,  3.,  2.,  3.,  3.,  3.,  3.,  3

In [None]:
# Example fitness_2
fitness_2([-1, 3, 3, 3, 3, 2, 0, 0, 0, -1], biscuits_list, roll_defects)

get_slice_score from 1 to 5 with size: 4
try pos : 1 for size: 4
assigned :  1  in a biscuit size:   4
assigned :  2  in a biscuit size:   4
assigned :  3  in a biscuit size:   4
assigned :  4  in a biscuit size:   4
get_slice_score from 5 to 6 with size: 1
try pos : 5 for size: 1
assigned :  5  in a biscuit size:   1
get_slice_score from 6 to 9 with size: 3
try pos : 6 for size: 3
assigned :  6  in a biscuit size:   3
assigned :  7  in a biscuit size:   3
assigned :  8  in a biscuit size:   3
get_slice_score from 9 to 10 with size: 1
try pos : 9 for size: 1
assigned :  9  in a biscuit size:   1


5.057500000000001