In [1]:
import numpy as np
import random
import pandas as pd

In [2]:
np.random.seed(42)
random.seed(42)

In [2]:
def init_population(pop_size, roll_size):
    
    return [np.random.randint(-1, 4, size=(roll_size,)) for _ in range(pop_size)]

In [3]:
def fitness(ind, biscuits_list, roll_defects):
    
    score = 0
    size = 0
    last_elem = -2  # So we don't have to check if we test the first value of the list each iteration doing ind[i-1]
    for i, elem in enumerate(ind):
        if elem == -1:  # If no biscuit value no need to test anymore
            score -= (1 + size)
            size = 0
        else:
            if elem != last_elem:
                score -= size
                size = 1
            else:
                size += 1
            if size == biscuits_list[elem][1]:  # Test the defects only if the biscuit reach its required size
                pos_defects = {"a": 0, "b": 0, "c": 0}
                for pos in range(i - size + 1, i):
                    for key in roll_defects[pos]:
                        pos_defects[key] += (roll_defects[pos][key])
                biscuit_defects = biscuits_list[elem][2]
                if any(b_value < p_value for b_value, p_value in zip(biscuit_defects.values(), pos_defects.values())):
                    score -= 1
                    size -= 1
                else:
                    score += biscuits_list[elem][0]
                    size = 0
        last_elem = elem
    score -= size  # Don't forget to remove last biscuit pieces that are not whole
    return score

In [4]:
def evolve_pop(population, mutation_rate, elite_ratio, biscuits_list, roll_defects):
    
    fitness_values = [fitness(ind, biscuits_list, roll_defects) for ind in population]
    
    # Shift fitness to make all values positive
    min_fitness = min(fitness_values)
    shifted_fitness = [f - min_fitness + 1 for f in fitness_values]  # Add 1 to avoid zero fitness

    fitness_sum = sum(shifted_fitness)
    probabilities = [f / fitness_sum for f in shifted_fitness]
    
    elite_idx = np.argsort(fitness_values)[int(-len(population) * elite_ratio):]
    
    elites = [population[i] for i in elite_idx]
    
    new_population = []
    while len(new_population) < len(population) - len(elites):
        
        # Cross over
        # Select two parents based on fitness probabilities
        parents_indices = np.random.choice(len(population), size=2, replace=False, p=probabilities)
        parent1 = population[parents_indices[0]]
        parent2 = population[parents_indices[1]]
        child = np.zeros(parent1.shape)
        break_points = np.random.choice(500, size=3, replace=False)
        break_points.sort
        start=0
        for bp in break_points:
            chosen_p = parent1 if random.random() < 0.5 else parent2
            child[start:bp] = chosen_p[start:bp]
            start = bp
            
        # Mutation
        muted_child = np.array([gene if random.random() > mutation_rate else random.randint(-1, 3) for gene in child] )
        
        new_population.append(muted_child)
    return new_population + elites

In [5]:
def genetic_algorithm(pop_size, mutation_rate, elite_ratio, biscuits_list, roll_defects, roll_size, max_iter, display):
    
    population = init_population(pop_size, roll_size)
    
    for i in range(max_iter):
        population = evolve_pop(population, mutation_rate, elite_ratio, biscuits_list, roll_defects)
        
        if i % display == 0:
            # Metric computation, remove 4 lines below to go faster
            fitness_values = [fitness(ind, biscuits_list, roll_defects) for ind in population]
            elite_idx = np.argsort(fitness_values)[-1]
            elite = population[elite_idx]
            print(f'Generation {i+1}: Best fitness {fitness_values[elite_idx]}')

    fitness_values = [fitness(ind, biscuits_list, roll_defects) for ind in population]
    elite_idx = np.argsort(fitness_values)[-1]
    elite = population[elite_idx]
    
    return elite
    

In [6]:
df = pd.read_csv("defects.csv")
print(df.shape)
df.head(1)

(500, 2)


Unnamed: 0,x,class
0,355.449335,c


In [14]:
# dict format -> id : (value, size, defects_threshold)

biscuits_list = {
    -1: (-1, 1, {"a":9, "b":9, "c":9}),
     0: ( 3, 4, {"a":4, "b":2, "c":3}),
     1: (12, 8, {"a":5, "b":4, "c":4}),
     2: ( 1, 2, {"a":1, "b":2, "c":1}),
     3: ( 8, 5, {"a":2, "b":3, "c":2}),
}

In [8]:
roll_size = 500
roll_defects = {i: {"a": 0, "b": 0, "c": 0} for i in range(roll_size)}
for _, row in df.iterrows():
    roll_defects[int(row["x"])][row["class"]] += 1

In [21]:
pop_size = 10000
mutation_rate = 0.01
elite_ratio = 0.1
max_iter = 100
display = 20

In [22]:
result = genetic_algorithm(pop_size, mutation_rate, elite_ratio, biscuits_list, roll_defects, roll_size, max_iter, display)

Generation 1: Best fitness 301
Generation 21: Best fitness 336
Generation 41: Best fitness 343


KeyboardInterrupt: 

In [20]:
result

array([-1.,  2.,  2.,  3.,  2.,  0.,  0.,  0.,  0., -1.,  2.,  0.,  0.,
        2.,  2.,  0.,  2.,  2.,  3.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0

In [15]:
fitness(np.full((500,), 3), biscuits_list, roll_defects)

618