# YOLO
**/!\ This version of YOLO does not include class and multi scale predictions.**

## Imports, Constants and Global Variables

In [None]:
'''
IMPORTS
'''
import numpy as np
import tensorflow as tf
from PIL import Image
import os
import pandas as pd
from tensorflow.keras import layers, models
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import absl.logging
import logging
from tensorflow.keras.utils import Sequence, plot_model
import time
from sklearn.cluster import KMeans
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
from PIL import Image
from tqdm import tqdm # progress bar
from datetime import datetime
from ipywidgets import Dropdown, Layout, Button, VBox, HBox, Output
np.random.seed(42)



absl.logging.set_verbosity(absl.logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.WARNING)
#tf.config.run_functions_eagerly(True)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(gpus, "Physical GPUs,", logical_gpus, "Logical GPUs")
    except RuntimeError as e:
        print(e)

'''
CONSTANTS
'''

# CONSTANTES
DATASET_FOLDER = 'datasets/yolo' # Dataset folder with annotations and images
MODEL_FOLDER = 'models/yolo' # Folder to save and load models
DATASET_SPLIT = (0.75,0.20,0.05) # Split between train, val and test
LABELS_FOLDER = DATASET_FOLDER + '/labels'
IMAGES_FOLDER = DATASET_FOLDER + '/images'
IMAGE_SIZE = (576, 1024) # Heigth, Width
MODEL_EPOCHS = 1000 # Number of epoch
MODEL_CELLULES = (18,32) # Number of cells heigth, width (line,column)
MODEL_BATCH_SIZE = 4 # batch size
MODEL_LEARNING_RATE = 1e-4
MODEL_PATIENCE = 10
BOK_FACTOR = 3 # Number of boxes to keep for the BoK

MODEL_ANCHOR_BOXES = [(1.0583118277729953, 1.049340222907784), (8.59272329748259, 2.6783446036374645), (18.042821164159008, 10.97661303553199), (2.988349659829224, 6.10604402643905), (2.1024247923276076, 2.270861681701132), (14.716370144570341, 3.3927061962834175), (10.948179662161893, 6.239582909394387), (6.07272101508836, 3.739122200908468), (8.002992148445207, 15.429423262078899), (29.46776909562174, 3.8386150181052767), (3.4639113280451594, 1.7175792011783335), (6.309270699179344, 7.352531098636265), (20.385446192500307, 2.959357080172756), (5.362068949615781, 2.033512908678979), (4.161533434208954, 11.930562829810054), (1.0825721324291453, 2.166483835080819), (3.5172166305716233, 3.310328528940259), (1.987218503609958, 1.2044695947380342)]

MODEL_LAMBDA_COORD = 0.9
MODEL_LAMBDA_IOU = 0.6
MODEL_LAMBDA_NOOBJ = 0.3

THRESHOLD_CONFIDENCE = 0.8 # Threshold for confidence score
MODEL_MINIMAL_IOU = 0.4 # Minimal IOU for a box to be considered as a match
MODEL_GRID_SENSIBILITY_COEF = 1.2 # Coefficient for the grid expansion
MODEL_SIGMOID_MULTIPLIER = 1 # Multiplier for the sigmoid for w,h
MODEL_SIGMOID_ADDER = 0.5 # Adder for the sigmoid for w,h

MODEL_CELLULES_SIZE = (IMAGE_SIZE[0]/MODEL_CELLULES[0], IMAGE_SIZE[1]/MODEL_CELLULES[1]) # Size of a cell in pixels (height, width)
MODEL_ANCHOR_BOXES_PIXELSIZE = [(MODEL_CELLULES_SIZE[0]*anchor[1], MODEL_CELLULES_SIZE[1]*anchor[0]) for anchor in MODEL_ANCHOR_BOXES] # Size of the anchor boxes in pixels
MODEL_ANCHOR_BOXES_PIXELSIZE_2 = [(anchor[0]/2,anchor[1]/2) for anchor in MODEL_ANCHOR_BOXES_PIXELSIZE] # Half size of the anchor boxes in pixels
MODEL_CELLULES_SIZE_INV = (1/MODEL_CELLULES_SIZE[0], 1/MODEL_CELLULES_SIZE[1]) # Inverse of the size of a cell in pixels (height, width)
MODEL_ANCHOR_BOXES_COLOR = [(np.random.rand(),np.random.rand(),np.random.rand()) for i in range(len(MODEL_ANCHOR_BOXES))] # Color for each anchor box

# Shuffle the dataset
dataset_filepath = np.array([ (image, label) for image, label in zip(os.listdir(IMAGES_FOLDER), os.listdir(LABELS_FOLDER)) ])
dataset_indices = np.arange(len(dataset_filepath))
np.random.shuffle(dataset_indices)

'''
UTILS
'''

# Select generator
def select_generator():
    selected_input = input('Choose the generator (train, val, test, default : train) : ')
    if selected_input == '' :
        return train_generator
    elif selected_input == 'train' :
        return train_generator
    elif selected_input == 'val' :
        return val_generator
    elif selected_input == 'test' :
        return test_generator
    else :
        raise ValueError('Unknown subset (train, val, test)')

# Sigmoïde function
def sigmoid(x):
    x=np.clip(x, -50, 50)
    return 1/(1+np.exp(-x))

# Softmax function
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

# Compute IOU
def compute_iou(boxA, boxB):
    # [x_center, y_center, width, height, confidence]
    xA = max(boxA[0] - boxA[2] / 2, boxB[0] - boxB[2] / 2)
    yA = max(boxA[1] - boxA[3] / 2, boxB[1] - boxB[3] / 2)
    xB = min(boxA[0] + boxA[2] / 2, boxB[0] + boxB[2] / 2)
    yB = min(boxA[1] + boxA[3] / 2, boxB[1] + boxB[3] / 2)

    interArea = max(0, xB - xA) * max(0, yB - yA)
    boxAArea = boxA[2] * boxA[3]
    boxBArea = boxB[2] * boxB[3]
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

# Use BoK to calculate the final boxes
def calculate_bok(grouped_boxes):
    bok_boxes = []

    for group in grouped_boxes:
        # Extract the coordinates and scores of the boxes in this group
        scores = np.array([box[4] for box in group])
        boxes = np.array([box[:4] for box in group])

        # Use softmax to calculate the weights
        weights = softmax(scores)

        # Calculate the weighted box
        weighted_box = np.sum(weights[:, None] * boxes, axis=0)  # None is used to add a new axis
        bok_boxes.append(weighted_box.tolist()+[np.max(scores)] +[group[np.argmax(scores)][5]]) # Add anchor box index for color

    return bok_boxes

# Non Maximum Suppression
def non_maximum_suppression(boxes):
    if len(boxes) == 0:
        return []

    # Extract the coordinates and scores of the boxes
    scores = np.array([box[4] for box in boxes])  # Convidence score
    boxes_coords = np.array([box[:4] for box in boxes])  # Coordinates

    # Sort the boxes by confidence score
    indices = np.argsort(-scores)

    final_boxes = []
    while indices.size > 0:
        current_index = indices[0]
        current_box = boxes_coords[current_index]
        final_boxes.append(boxes[current_index])  # Add the current box to the final list

        # Compute the IOU between the current box and all other boxes
        mask = np.ones(indices.shape, dtype=bool)
        mask[0] = False  # Ignore the current box

        # Compute IOU between the current box and all other boxes
        for i in range(1, len(indices)):
            if compute_iou(current_box, boxes_coords[indices[i]]) > MODEL_MINIMAL_IOU:
                mask[i] = False  # Remove the box if the IOU is higher than the threshold

        # Update indices for the next iteration
        indices = indices[mask]

    return final_boxes

# Parse prediction of the model ( only 1 image )
def parse_prediction(y_pred, bok=BOK_FACTOR):
    pred_boxes=y_pred[..., 0:4]
    pred_conf=tf.sigmoid(y_pred[..., 4])
    boxes = []
    # Check each cell
    for i in range(MODEL_CELLULES[0]):
        for j in range(MODEL_CELLULES[1]):
            k_list = np.argsort(pred_conf[i, j])[::-1][:bok]
            cell_boxes = []
            for k in k_list:
                box = pred_boxes[i, j, k]
                if pred_conf[i,j,k].numpy() > THRESHOLD_CONFIDENCE :
                    # Get the box coordinates
                    x_center = (j + MODEL_GRID_SENSIBILITY_COEF*sigmoid(box[0]) - (MODEL_GRID_SENSIBILITY_COEF-1)/2) * MODEL_CELLULES_SIZE[1]
                    y_center = (i + MODEL_GRID_SENSIBILITY_COEF*sigmoid(box[1]) - (MODEL_GRID_SENSIBILITY_COEF-1)/2) * MODEL_CELLULES_SIZE[0]
                    w_box = (MODEL_SIGMOID_MULTIPLIER*sigmoid(box[2]) + MODEL_SIGMOID_ADDER ) * MODEL_ANCHOR_BOXES[k][0] * MODEL_CELLULES_SIZE[1]
                    h_box = (MODEL_SIGMOID_MULTIPLIER*sigmoid(box[3]) + MODEL_SIGMOID_ADDER ) * MODEL_ANCHOR_BOXES[k][1] * MODEL_CELLULES_SIZE[0]
                    conf = pred_conf[i,j,k].numpy()
                    cell_boxes.append((x_center, y_center, w_box, h_box, conf, k)) 
            if cell_boxes != []:
                boxes.append(cell_boxes)
    return boxes

# Parse label of the model ( only 1 image )
def parse_label(y_true):
    boxes = []
    for i in range(MODEL_CELLULES[0]):
        for j in range(MODEL_CELLULES[1]):
            for anchor in range(len(MODEL_ANCHOR_BOXES)):
                box = y_true[i, j, anchor]
                if box[4] == 1:
                    # Get the box coordinates
                    x_center_abs = (j + box[0]) * MODEL_CELLULES_SIZE[0]
                    y_center_abs = (i + box[1]) * MODEL_CELLULES_SIZE[1]
                    width_abs = box[2] * MODEL_ANCHOR_BOXES_PIXELSIZE[anchor][1]
                    height_abs = box[3] * MODEL_ANCHOR_BOXES_PIXELSIZE[anchor][0]
                    boxes.append((x_center_abs, y_center_abs, width_abs, height_abs, 1, anchor))
    return boxes

# Calculate IOU metrics
def calculate_iou_metrics(predicted_boxes, true_boxes):
    ious = []
    for pred_box in predicted_boxes:
        max_iou = 0 
        for true_box in true_boxes:
            iou = compute_iou(pred_box, true_box)
            if iou > max_iou:
                max_iou = iou
        ious.append(max_iou)
    return ious

# Show inference
def draw_predict(image, y_pred=None, y_true=None, bok=BOK_FACTOR, title=None):
    fig, ax = plt.subplots(1)
    # Add the predicted boxes if available
    if y_pred is not None:
        # Draw the grid
        for i in range(MODEL_CELLULES[1] + 1):  # Lignes verticales
            ax.axvline(x=i * MODEL_CELLULES_SIZE[1], color='w', linestyle='-', linewidth=0.1)
        for j in range(MODEL_CELLULES[0] + 1):  # Lignes horizontales
            ax.axhline(y=j * MODEL_CELLULES_SIZE[0], color='w', linestyle='-', linewidth=0.1)
        
        # Get the predicted boxes
        boxes = parse_prediction(y_pred, bok)
        boxes = calculate_bok(boxes)
        boxes = non_maximum_suppression(boxes)
        # Draw the predicted boxes
        for box in boxes:
            x_center, y_center, w_box, h_box, conf, k = box
            # Calcul des coins de la box
            x_min = x_center - w_box / 2
            y_min = y_center - h_box / 2
            # Dessiner la boîte englobante
            rect=patches.Rectangle((x_min, y_min), w_box, h_box, linewidth=1, edgecolor=MODEL_ANCHOR_BOXES_COLOR[k], facecolor='none')
            ax.add_patch(rect)
            ax.scatter(x_center, y_center, color=MODEL_ANCHOR_BOXES_COLOR[k], s=2)

    # Add the true boxes if available
    if y_true is not None :
        boxes = parse_label(y_true)
        for box in boxes:
            x_center_abs, y_center_abs, width_abs, height_abs, _, anchor = box
            # Dessiner la boîte englobante
            x_min = x_center_abs - width_abs / 2
            y_min = y_center_abs - height_abs / 2
            rect = patches.Rectangle((x_min, y_min), width_abs, height_abs, linewidth=1, edgecolor=MODEL_ANCHOR_BOXES_COLOR[anchor], facecolor='none')
            ax.add_patch(rect)
            # Dessiner un point rouge au centre de la boîte englobante
            ax.scatter(x_center_abs, y_center_abs, color=MODEL_ANCHOR_BOXES_COLOR[anchor], s=2)  # `s` contrôle la taille du point

    ax.imshow(image)
    ax.axis('off')
    if title is not None:
        ax.set_title(title)
    plt.show()

# Plot metrics
def plot_training_curves(history, val_history):
    epochs = len(history['loss'])
    epoch_range = range(1, epochs + 1)

    plt.figure(figsize=(16, 12))

    metrics = ['loss', 'loss_coord', 'loss_pred_iou', 'loss_pred_noobj', 'best_iou_mean']
    titles = ['Loss', 'IOU Loss', 'Conf Loss', 'No Object Loss', 'Best IOU Mean']
    y_labels = ['Loss', 'Loss', 'Loss', 'Loss', 'IOU']

    for i, metric_key in enumerate(metrics):
        plt.subplot(3, 2, i + 1)
        train_values = [np.mean(loss) for loss in history[metric_key]]
        val_values = [np.mean(loss) for loss in val_history['val_' + metric_key]]
        plt.plot(epoch_range, train_values, 'b', label='Train Mean', linewidth=2)
        plt.plot(epoch_range, val_values, 'r', label='Validation Mean', linewidth=2)
        plt.title(titles[i])
        plt.xlabel('Epochs')
        plt.ylabel(y_labels[i])
        plt.legend()

    plt.tight_layout()
    plt.show()

# Select Model learning rate
def select_learning_rate():
    global MODEL_LEARNING_RATE
    learning_rate = input(f'Choose the learning rate (default : {MODEL_LEARNING_RATE}) : ')
    if learning_rate != '' :
        MODEL_LEARNING_RATE = float(learning_rate)
    return MODEL_LEARNING_RATE

# Save model with config
def save_model_with_config(model):
    date_now = datetime.now().strftime("%d-%m")
    
    # Make the folder
    model_folder = f"{MODEL_FOLDER}/yolo_{date_now}"
    if not os.path.exists(model_folder):
        os.makedirs(model_folder)
    
    # Save the model
    model_path = f"{model_folder}/yolo_{date_now}.h5"
    model.save(model_path)
    
    # Save the config
    CONSTANTES_LIST = ['MODEL_ANCHOR_BOXES', 'MODEL_CELLULES', 'IMAGE_SIZE', 'MODEL_CELLULES_SIZE', 'THRESHOLD_CONFIDENCE', 'MODEL_MINIMAL_IOU', 'MODEL_GRID_SENSIBILITY_COEF', 'MODEL_SIGMOID_MULTIPLIER', 'MODEL_SIGMOID_ADDER']
    config_path = f"{model_folder}/config.py"
    with open(config_path, 'w') as f:
        for constante in CONSTANTES_LIST:
            f.write(f"{constante} = {globals()[constante]}\n")
    
    print(f"Model saved at {model_path}")

# Stats 
def print_statistics(data, label):
    data = np.array(data)
    max_val = np.max(data)
    min_val = np.min(data)
    mean_val = np.mean(data)
    median_val = np.median(data)
    std_dev = np.std(data)
    
    print(f"Statistics for {label}:")
    print(f"  Max: {max_val:.4f}")
    print(f"  Min: {min_val:.4f}")
    print(f"  Mean: {mean_val:.4f}")
    print(f"  Median: {median_val:.4f}")
    print(f"  Standard Deviation: {std_dev:.4f}\n")

'''
STATS
'''
print(f'Number of images/labels in the dataset : {len(dataset_indices)}')




## Cluster

### Extract labels

In [None]:
# Get the bounding boxes from the labels
bounding_boxes = []
for label,image in zip(os.listdir(LABELS_FOLDER), os.listdir(IMAGES_FOLDER)):
    with open(os.path.join(LABELS_FOLDER, label), 'r') as f:
        for line in f:
            _, _, _, w, h = line.split()
            # Get the aspect ratio of the image
            with Image.open(os.path.join(IMAGES_FOLDER, image)) as img:
                img_w, img_h = img.size
            old_aspect_ratio = img_w / img_h
            # Get the object 
            w = float(w) * MODEL_CELLULES[1]
            h = float(h) * MODEL_CELLULES[0]
            # Add the bounding box to the list
            bounding_boxes.append((w, h))

kmean_data = np.array(bounding_boxes)
print(f'Number of items in the dataset : {kmean_data.shape[0]}')

### K-means

In [None]:
k = 9 # Number of clusters

kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(kmean_data)
anchor_boxes = kmeans.cluster_centers_

# Plot

plt.scatter(kmean_data[:, 0], kmean_data[:, 1], c=kmeans.labels_, cmap='viridis', marker='o', label='Bounding box')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', marker='x', label='Cluster center')
plt.title('Anchor Boxes Clustering')
plt.xlabel('Width')
plt.ylabel('Height')
plt.legend()

plt.show()

# Calculate inertia and silhouette score
inertia = kmeans.inertia_
silhouette_avg = silhouette_score(kmean_data, kmeans.labels_)
print(f'Model Inertia: {inertia}')
print(f'Silhouette Score: {silhouette_avg}')
anchor_boxes = [ (x,y) for x,y in kmeans.cluster_centers_]
print(f'Anchor boxes : {anchor_boxes}')

## Dataset

### Load dataset

In [None]:
# Generator
class CustomDatasetLoaderYOLO(Sequence):
    def __init__(self, subset):
        def get_subset(subset, lenght):
            train_end = int(DATASET_SPLIT[0] * lenght)
            val_end = train_end + int(lenght* DATASET_SPLIT[1])
            if subset == 'train':
                return 0, train_end
            elif subset == 'val':
                return train_end,val_end
            elif subset == 'test':
                return val_end, lenght
            else:
                raise ValueError('Unknown subset (train, val, test)')
        
        start, stop = get_subset(subset, len(dataset_indices))
        self.indices = dataset_indices[start:stop]
 
    def __len__(self):
        return np.ceil(len(self.indices) / MODEL_BATCH_SIZE).astype('int')

    def __getitem__(self, idx):
        batch_indices = self.indices[idx * MODEL_BATCH_SIZE : (idx + 1) * MODEL_BATCH_SIZE]

        batch_images = []
        batch_labels = []
        for image, label in dataset_filepath[batch_indices] :
            # load image
            with Image.open(os.path.join(IMAGES_FOLDER,image)) as img:
                img = img.convert('RGB')
                #original_image_height, original_image_width = img.size
                #ratiox = original_image_height / IMAGE_SIZE[0]
                #ratioy = original_image_width / IMAGE_SIZE[1]
                img = img.resize((IMAGE_SIZE[1],IMAGE_SIZE[0]))
                batch_images.append(np.array(img))
            # load label
            img_label = np.zeros((MODEL_CELLULES[0], MODEL_CELLULES[1], len(MODEL_ANCHOR_BOXES) ,5), dtype=np.float32)
            for line in open(os.path.join(LABELS_FOLDER,label)).readlines():
                _ ,x_center, y_center, width, height = line.split()[0:5] # we use _ because we don't use the class index
                x_center, y_center, width, height = map(float, (x_center, y_center, width, height))
                
                
                # Find the right cell
                cell_x, rest_x = divmod(MODEL_CELLULES[1]*x_center, 1) # Y is height et X is width
                cell_y, rest_y = divmod(MODEL_CELLULES[0]*y_center, 1)

                # Find ther cell size
                box_width, box_height = width * IMAGE_SIZE[1], height * IMAGE_SIZE[0]

                # Find the right anchor box = the one with the best IOU
                best_iou = 0
                for i, (anchorbox_height, anchorbox_width) in enumerate(MODEL_ANCHOR_BOXES_PIXELSIZE):
                    iou = min(anchorbox_width, box_width) * min(anchorbox_height, box_height) / (max(anchorbox_width, box_width) * max(anchorbox_height, box_height))
                    if iou > best_iou:
                        best_iou = iou
                        best_anchor = i
                # Calculate the coords inside the cell
                width_cell = box_width / (MODEL_ANCHOR_BOXES[best_anchor][0]*MODEL_CELLULES_SIZE[0])
                height_cell = box_height / (MODEL_ANCHOR_BOXES[best_anchor][1]*MODEL_CELLULES_SIZE[1])

                # Adding coords to the label
                img_label[int(cell_y), int(cell_x), best_anchor, 0] = rest_x
                img_label[int(cell_y), int(cell_x), best_anchor, 1] = rest_y
                img_label[int(cell_y), int(cell_x), best_anchor, 2] = width_cell
                img_label[int(cell_y), int(cell_x), best_anchor, 3] = height_cell
                img_label[int(cell_y), int(cell_x), best_anchor, 4] = 1 # Show that there is an item at this cell and anchor
            batch_labels.append(img_label)

        batch_images = np.asarray(batch_images)/ 255
        batch_labels = np.asarray(batch_labels).astype(np.float32)
        
        return batch_images ,batch_labels

    def on_epoch_end(self):
        np.random.shuffle(self.indices)
        
train_generator = CustomDatasetLoaderYOLO('train')
val_generator = CustomDatasetLoaderYOLO('val')
test_generator = CustomDatasetLoaderYOLO('test')

print(f'train dataset lenght : {len(train_generator)}')
print(f'Number of images in train dataset : {len(train_generator.indices)}')

### Testing dataset

#### Show random image+label from the dataset

In [None]:
def show_sample_image_with_boxes_and_grid(generator, figsize=(12, 12)):
    # Extract data from generator
    generator_len = len(generator)
    random_id = np.random.randint(0, generator_len)
    images, labels = generator[random_id]
    idx = np.random.choice(images.shape[0])
    image = images[idx]
    fig, ax = plt.subplots(1, figsize=figsize)
    # Show image
    ax.imshow(image)


    # Plot cells
    for i in range(MODEL_CELLULES[1] + 1):  # Lignes verticales
        ax.axvline(x=i * MODEL_CELLULES_SIZE[1], color='w', linestyle='-', linewidth=0.1)
    for j in range(MODEL_CELLULES[0] + 1):  # Lignes horizontales
        ax.axhline(y=j * MODEL_CELLULES_SIZE[0], color='w', linestyle='-', linewidth=0.1)

    # Plot bounding boxes
    label = labels[idx]
    for i in range(MODEL_CELLULES[0]): 
        for j in range(MODEL_CELLULES[1]):
            for anchor in range(len(MODEL_ANCHOR_BOXES_PIXELSIZE)):
                box = label[i, j, anchor]
                if box[4] == 1:  # Detect item
                    # Absolute coords of the bounding box
                    x_center_abs = (j + box[0]) * MODEL_CELLULES_SIZE[0]
                    y_center_abs = (i + box[1]) * MODEL_CELLULES_SIZE[1]
                    width_abs = box[2] * MODEL_ANCHOR_BOXES_PIXELSIZE[anchor][1]
                    height_abs = box[3] * MODEL_ANCHOR_BOXES_PIXELSIZE[anchor][0]

                    # Plot of the bounding box
                    x_min = x_center_abs - width_abs / 2
                    y_min = y_center_abs - height_abs / 2
                    rect = patches.Rectangle((x_min, y_min), width_abs, height_abs, linewidth=1, edgecolor=MODEL_ANCHOR_BOXES_COLOR[anchor], facecolor='none')
                    ax.add_patch(rect)
                    ax.scatter(x_center_abs, y_center_abs, color='r', s=1)  # add little dot

    plt.show()

show_sample_image_with_boxes_and_grid(select_generator())

#### Measure processing time

----
**Avg computer**

Mean batch time for train set: 85.7 ms +- 41.9 ms

Mean batch time for val set: 86.0 ms +- 45.7 ms

Mean batch time for test set: 86.7 ms +- 32.8 ms

----


In [None]:
def measure_generator_speed(generator):
    times = []
    
    for i in range(len(generator)) :
        start_time = time.time()
        generator[i]
        end_time = time.time()
        total_time = end_time - start_time
        times.append(total_time)
    
    avg_time = np.mean(times) * 1000
    std_time = np.std(times) * 1000
    
    
    return avg_time, std_time

# Measure the speed of the generators
avg_time_train, std_time_train = measure_generator_speed(train_generator)
avg_time_val, std_time_val = measure_generator_speed(val_generator)
avg_time_test, std_time_test = measure_generator_speed(test_generator)

print(f"Mean batch time for train set: {avg_time_train:.1f} ms +- {std_time_train:.1f} ms")
print(f"Mean batch time for val set: {avg_time_val:.1f} ms +- {std_time_val:.1f} ms")
print(f"Mean batch time for test set: {avg_time_test:.1f} ms +- {std_time_test:.1f} ms")


## Model

### Model Definition

In [None]:
def YOLO():
    def res_block(input, filters, size=2, kernel_size=3, dropout=0.05) :
        skip = input
        for i in range(size) :
            input = layers.Conv2D(filters, kernel_size, padding='same')(input)
            input = layers.BatchNormalization()(input)
            input = layers.LeakyReLU(alpha=0.1)(input)
            input = layers.Dropout(dropout)(input)
        input = layers.Add()([skip, input])
        input = layers.BatchNormalization()(input)
        input = layers.LeakyReLU(alpha=0.1)(input)
        return input

    # Input
    input_img = layers.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

    x = layers.BatchNormalization()(input_img)
    x = layers.Dropout(0.10)(x)
    x = layers.Conv2D(16, 3, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    for i in range(4) :
        x = res_block(x, 16, 3)
    x = layers.Conv2D(32, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    for i in range(4) :
        x = res_block(x, 32, 3)
    x = layers.Conv2D(64, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    for i in range(4) :
        x = res_block(x, 64, 3)
    x = layers.Conv2D(128, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    for i in range(4) :
        x = res_block(x, 128, 3)
    x = layers.Conv2D(256, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    for i in range(4) :
        x = res_block(x, 256, 3)
    x = layers.Conv2D(512, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    for i in range(5) :
        x = res_block(x, 512, 3)

    # Output
    output = layers.Conv2D(5*len(MODEL_ANCHOR_BOXES), (3, 3), padding='same')(x)
    output = layers.Reshape((MODEL_CELLULES[0], MODEL_CELLULES[1], len(MODEL_ANCHOR_BOXES), 5))(output)

    # Model
    model = models.Model(inputs=input_img, outputs=output, name='MiniYOLO')
    return model

model = YOLO()
model.summary()

### Load existing model

In [None]:
model_files = []
for root, dirs, files in os.walk(MODEL_FOLDER):
    for file in files:
        if file.endswith('.h5'):
            model_files.append(os.path.join(root, file))

# Dropdown widget to select the model
dropdown = Dropdown(
    options=model_files,
    description='Select Model:',
    layout=Layout(width='50%', height='30px'),
    style={'description_width': 'initial'}
)

def button_load_model(b):
    global model
    model_path = dropdown.value
    model = load_model(model_path)
    print(f"Model loaded from {model_path}")
    load_button.close()
    dropdown.close()

# Button to load the model
load_button = Button(description="Load Model")
load_button.on_click(button_load_model)

# Plot the widget
display(VBox([dropdown, load_button]))


### Plot model

In [None]:
plot_model(model, show_shapes=True)

### Model Training

In [None]:
# DIOU / PRED_IOU (loss_coord, loss_conf_iou, loss_noobj)
@tf.function
def yolo_loss(y_true, y_pred):
  
    # Loss functions
    binary_crossentropy = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
    binary_crossentropy_ls = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE, label_smoothing=0.01)

    # Constants
    anchor_sizes = tf.constant(MODEL_ANCHOR_BOXES, dtype=tf.float32)
    pattern_ascending = tf.tile(tf.range(MODEL_CELLULES[1], dtype=tf.float32)[tf.newaxis, :], [MODEL_CELLULES[0], 1]) # Pour les coordonnées x (0,1,2,..,MODEL_CELLULES[1]-1)
    pattern_row_index = tf.tile(tf.range(MODEL_CELLULES[0], dtype=tf.float32)[:, tf.newaxis], [1, MODEL_CELLULES[1]]) # Pour les coordonnées y (0,0,0,..,MODEL_CELLULES[0]-1)
    pattern_ascending = tf.expand_dims(pattern_ascending, axis=0)  # Ajouter des dimensions pour la compatibilité avec les calculs
    pattern_ascending = tf.expand_dims(pattern_ascending, axis=-1)  # Ajouter des dimensions pour la compatibilité avec les calculs
    pattern_row_index = tf.expand_dims(pattern_row_index, axis=0)  # Ajouter des dimensions pour la compatibilité avec les calculs
    pattern_row_index = tf.expand_dims(pattern_row_index, axis=-1)  # Ajouter des dimensions pour la compatibilité avec les calculs

    # Splitting the prediction
    pred_x, pred_y, pred_w, pred_h, pred_conf= tf.split(y_pred, (1, 1, 1, 1, 1), axis=-1)
    pred_x = tf.squeeze(pred_x, axis=-1)
    pred_y = tf.squeeze(pred_y, axis=-1)
    pred_w = tf.squeeze(pred_w, axis=-1)
    pred_h = tf.squeeze(pred_h, axis=-1)
    pred_conf = tf.squeeze(pred_conf, axis=-1)
    converted_pred_x = pattern_ascending + MODEL_GRID_SENSIBILITY_COEF*tf.sigmoid(pred_x) - (MODEL_GRID_SENSIBILITY_COEF-1)/2
    converted_pred_y = pattern_row_index + MODEL_GRID_SENSIBILITY_COEF*tf.sigmoid(pred_y) - (MODEL_GRID_SENSIBILITY_COEF-1)/2
    converted_pred_w = (MODEL_SIGMOID_MULTIPLIER*tf.sigmoid(pred_w) + MODEL_SIGMOID_ADDER )* anchor_sizes[:, 0]
    converted_pred_h = (MODEL_SIGMOID_MULTIPLIER*tf.sigmoid(pred_h) + MODEL_SIGMOID_ADDER )* anchor_sizes[:, 1]
    converted_pred_conf = tf.sigmoid(pred_conf)
    
    # Splitting the true values
    true_x, true_y, true_w, true_h, true_conf = tf.split(y_true, (1, 1, 1, 1, 1), axis=-1)
    true_x = tf.math.reduce_sum(tf.squeeze(true_x, axis=-1), axis=-1, keepdims=True)
    true_y = tf.math.reduce_sum(tf.squeeze(true_y, axis=-1), axis=-1, keepdims=True)
    converted_true_x = true_x + pattern_ascending
    converted_true_y = true_y + pattern_row_index
    true_w = tf.squeeze(true_w, axis=-1)
    true_h = tf.squeeze(true_h, axis=-1)
    converted_true_w = tf.math.reduce_sum(true_w * anchor_sizes[:, 0], axis=-1, keepdims=True)  # Largeur vraie ajustée
    converted_true_h = tf.math.reduce_sum(true_h * anchor_sizes[:, 1], axis=-1, keepdims=True)  # Hauteur vraie ajustée
    true_conf = tf.squeeze(true_conf, axis=-1)

    # Masking
    obj = tf.cast(true_conf == 1, tf.float32)
    obj_area = tf.reduce_sum(obj, axis=-1, keepdims=True)
    noobj_area = tf.reduce_sum(1-obj, axis=-1, keepdims=True)
    noobj = tf.cast(true_conf == 0, tf.float32)
    nb_obj = tf.reduce_sum(obj)



    """
     Calculates GIOU and DIOU between predicted anchor boxes and ground truths,
    taking into account different anchor box sizes.
    """

    # Convert the predicted values to absolute coordinates
    true_x_min, true_y_min = converted_true_x - converted_true_w / 2, converted_true_y - converted_true_h / 2
    true_x_max, true_y_max = converted_true_x + converted_true_w / 2, converted_true_y + converted_true_h / 2
        
    pred_x_min, pred_y_min = converted_pred_x - converted_pred_w / 2, converted_pred_y - converted_pred_h / 2
    pred_x_max, pred_y_max = converted_pred_x + converted_pred_w / 2, converted_pred_y + converted_pred_h / 2
        
    # Intersection coordinates
    inter_x_min = tf.maximum(true_x_min, pred_x_min)
    inter_y_min = tf.maximum(true_y_min, pred_y_min)
    inter_x_max = tf.minimum(true_x_max, pred_x_max)
    inter_y_max = tf.minimum(true_y_max, pred_y_max)
        
    # Enclosing box coordinates
    englob_x_min = tf.minimum(true_x_min, pred_x_min)
    englob_y_min = tf.minimum(true_y_min, pred_y_min)
    englob_x_max = tf.maximum(true_x_max, pred_x_max)
    englob_y_max = tf.maximum(true_y_max, pred_y_max)
        
    # Intersection area
    inter_area = tf.maximum(inter_x_max - inter_x_min, 0) * tf.maximum(inter_y_max - inter_y_min, 0)
    # True and predicted areas
    true_area = (true_x_max - true_x_min) * (true_y_max - true_y_min)
    pred_area = (pred_x_max - pred_x_min) * (pred_y_max - pred_y_min)
    # Union area
    union_area = true_area + pred_area - inter_area
    # Center distance
    center_distance = tf.square(converted_true_x - converted_pred_x) + tf.square(converted_true_y - converted_pred_y)
    # Calculate the "smallest enclosing box covering the two boxes diagonal line squared"
    max_distance = tf.square( englob_x_max - englob_x_min) + tf.square(englob_y_max - englob_y_min)
    max_box_area = (englob_x_max - englob_x_min) * (englob_y_max - englob_y_min)
    # IOU
    iou = inter_area / (union_area + 1e-6) # CAREFUL WITH DIVISION BY ZERO
    # DIoU
    distance_factor = center_distance / (max_distance + 1e-6) # CAREFUL WITH DIVISION BY ZERO
    diou = iou - distance_factor
    diouloss = (1 - diou) * obj_area
    # GIoU
    giou = iou - (max_box_area - true_area - pred_area + union_area) / (max_box_area + 1e-6)
    giouloss = (1 - giou) * obj_area
    

    """
    Loss calculation
    """
    
    # Loss from the coordinates
    loss_coord = tf.reduce_sum(giouloss)
    loss_coord = MODEL_LAMBDA_COORD * loss_coord / (nb_obj*len(MODEL_ANCHOR_BOXES)+1e-6)

    # loss from the confidence for cells with objects (we want the model to learn to predict its IOU)
    loss_pred_iou = MODEL_LAMBDA_IOU * tf.reduce_sum(binary_crossentropy(converted_pred_conf*obj_area, iou)) / (nb_obj+1e-6)

    # Loss from the confidence for cells without objects
    loss_pred_noobj = MODEL_LAMBDA_NOOBJ * tf.reduce_sum(binary_crossentropy_ls(converted_pred_conf*noobj_area, true_conf*noobj_area)) / (MODEL_BATCH_SIZE * (MODEL_CELLULES[0] * MODEL_CELLULES[1] * len(MODEL_ANCHOR_BOXES)) - nb_obj+1e-6)

    # Total loss
    total_loss = loss_coord + loss_pred_iou + loss_pred_noobj
    
    # Useful metrics
    best_iou_mean = tf.reduce_sum(tf.reduce_max(iou, axis=-1)) / (nb_obj+1e-6)
    
    return total_loss, (loss_coord, loss_pred_iou, loss_pred_noobj, best_iou_mean)

optimizer=tf.keras.optimizers.Adam(learning_rate=select_learning_rate())
print(f'Learning rate : {optimizer.learning_rate.numpy()}')

@tf.function
def train_step(inputs, labels):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss, loss_data = yolo_loss(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss, loss_data

# Training loop
best_loss_epoch = 0
best_loss = float('inf')
history = {
    'loss' : [],
    'loss_coord' : [],
    'loss_pred_iou' : [],
    'loss_pred_noobj' : [],
    'best_iou_mean' : [],
}
val_history = {
    'val_loss' : [],
    'val_loss_coord' : [],
    'val_loss_pred_iou' : [],
    'val_loss_pred_noobj' : [],
    'val_best_iou_mean' : [],
}
for epoch in range(MODEL_EPOCHS):
    found_better_loss = False
    # Train loop
    with tqdm(total=len(train_generator), desc=f'Training {epoch+1}/{MODEL_EPOCHS}', unit='batch') as pbar:
        epoch_losses = {
            'loss' : [],
            'loss_coord' : [],
            'loss_pred_iou' : [],
            'loss_pred_noobj' : [],
            'best_iou_mean' : [],
        }
        for step in range(len(train_generator)) :
            inputs, labels = train_generator[step]
            # Batch training
            loss, loss_data = train_step(inputs, labels)
            loss_coord, loss_pred_iou, loss_pred_noobj, best_iou_mean = loss_data
            # Metrics
            epoch_losses['loss'].append(loss.numpy())
            epoch_losses['loss_coord'].append(loss_coord.numpy())
            epoch_losses['loss_pred_iou'].append(loss_pred_iou.numpy())
            epoch_losses['loss_pred_noobj'].append(loss_pred_noobj.numpy())
            epoch_losses['best_iou_mean'].append(best_iou_mean.numpy())

            mean_epoch_loss = np.mean(epoch_losses['loss'])
            mean_epoch_loss_coord = np.mean(epoch_losses['loss_coord'])
            mean_epoch_loss_pred_iou = np.mean(epoch_losses['loss_pred_iou'])
            mean_epoch_loss_pred_noobj = np.mean(epoch_losses['loss_pred_noobj'])
            mean_epoch_best_iou_mean = np.mean(epoch_losses['best_iou_mean'])
            pbar.set_postfix({'Loss' :f"{mean_epoch_loss:.6f}",
                              'Loss coord' :f"{mean_epoch_loss_coord:.6f}",
                              'Loss pred iou' :f"{mean_epoch_loss_pred_iou:.6f}",
                              'Loss noobj' :f"{mean_epoch_loss_pred_noobj:.6f}",
                              'Best IOU Mean' :f"{mean_epoch_best_iou_mean:.3f}",
            }
            )
            
            pbar.update()
    train_generator.on_epoch_end()
    history['loss'].append(epoch_losses['loss'])
    history['loss_coord'].append(epoch_losses['loss_coord'])
    history['loss_pred_iou'].append(epoch_losses['loss_pred_iou'])
    history['loss_pred_noobj'].append(epoch_losses['loss_pred_noobj'])
    history['best_iou_mean'].append(epoch_losses['best_iou_mean'])

    # Val loop
    with tqdm(total=len(val_generator), desc=f'Validation {epoch+1}/{MODEL_EPOCHS}', unit='batch') as pbar:
        val_epoch_losses = {
            'val_loss' : [],
            'val_loss_coord' : [],
            'val_loss_pred_iou' : [],
            'val_loss_pred_noobj' : [],
            'val_best_iou_mean' : [],
        }
        for step in range(len(val_generator)) :
            inputs, labels = val_generator[step]
            # Batch inference
            predictions = model(inputs, training=False) # No dropout
            loss, loss_data = yolo_loss(labels, predictions)
            loss_coord, loss_pred_iou, loss_pred_noobj, best_iou_mean = loss_data
            # Metrics
            val_epoch_losses['val_loss'].append(loss.numpy())
            val_epoch_losses['val_loss_coord'].append(loss_coord.numpy())
            val_epoch_losses['val_loss_pred_iou'].append(loss_pred_iou.numpy())
            val_epoch_losses['val_loss_pred_noobj'].append(loss_pred_noobj.numpy())
            val_epoch_losses['val_best_iou_mean'].append(best_iou_mean.numpy())

            mean_val_epoch_loss = np.mean(val_epoch_losses['val_loss'])
            mean_val_epoch_loss_coord = np.mean(val_epoch_losses['val_loss_coord'])
            mean_val_epoch_loss_pred_iou = np.mean(val_epoch_losses['val_loss_pred_iou'])
            mean_val_epoch_loss_pred_noobj = np.mean(val_epoch_losses['val_loss_pred_noobj'])
            mean_val_epoch_best_iou_mean = np.mean(val_epoch_losses['val_best_iou_mean'])

            pbar.set_postfix({'Loss' :f"{mean_val_epoch_loss:.6f}",
                              'Loss coord' :f"{mean_val_epoch_loss_coord:.6f}",
                              'Loss pred iou' :f"{mean_val_epoch_loss_pred_iou:.6f}",
                              'Loss noobj' :f"{mean_val_epoch_loss_pred_noobj:.6f}",
                              'Best IOU Mean' :f"{mean_val_epoch_best_iou_mean:.3f}",
            }
            )
        
            pbar.update()
    val_epoch_loss = np.mean(val_epoch_losses['val_loss'])
    val_history['val_loss'].append(val_epoch_losses['val_loss'])
    val_history['val_loss_coord'].append(val_epoch_losses['val_loss_coord'])
    val_history['val_loss_pred_iou'].append(val_epoch_losses['val_loss_pred_iou'])
    val_history['val_loss_pred_noobj'].append(val_epoch_losses['val_loss_pred_noobj'])
    val_history['val_best_iou_mean'].append(val_epoch_losses['val_best_iou_mean'])
    
    # Compare epoch with the best epoch yet
    if val_epoch_loss < best_loss :
        best_loss_epoch = epoch
        best_loss = val_epoch_loss
        found_better_loss = True
        print(f'Found a better validation epoch with a mean loss of {best_loss:.6f}')

    # Plot and save model
    if found_better_loss:
        random_id = np.random.randint(0, len(val_generator))
        selected_images, selected_labels = val_generator[random_id]
        random_idx = np.random.randint(0, selected_images.shape[0])
        selected_image = np.expand_dims(selected_images[random_idx], axis=0)
        selected_label = selected_labels[random_idx]
        # Inference
        predictions = model(selected_image, training=True)[0]
        draw_predict(selected_image[0], y_pred=predictions, bok=3)
        # Save model
        save_model_with_config(model)

    # Early stopping
    if epoch - best_loss_epoch >= MODEL_PATIENCE:
        print(f"Training stopped. No improvement was seen in the last {MODEL_PATIENCE} epochs.")
        break
    print('-'*50)


## Evaluation

### Show training metrics

In [None]:
plot_training_curves(history, val_history)

### Test on an random image from a generator

In [None]:
# Extract data from generator
generator = select_generator()
generator_len = len(generator)
random_id = np.random.randint(0, generator_len)
images, labels = generator[random_id]
idx = np.random.choice(images.shape[0])
image = images[idx]
label = labels[idx]
image = np.expand_dims(image, axis=0)
label = np.expand_dims(label, axis=0)
y_pred = model(image,training=False)

draw_predict(image[0], y_pred[0], bok=1)


### Comparaison for BoK factor

In [None]:
numbers = [1, 2, 3, 4, 5, 6, 7]
generator = select_generator()
ious = []
for number in numbers:
    ious_number = []
    with tqdm(total=len(generator), desc=f'BoK {number}', unit='batch') as pbar:
        for step in range(len(generator)) :
            inputs, labels = generator[step]
            predictions = model(inputs, training=False)
            for i in range(len(inputs)):
                label = labels[i]
                prediction = predictions[i]

                parsed_labels = parse_label(label)
                parsed_predictions = parse_prediction(prediction, bok=number)
                final_boxes = non_maximum_suppression(calculate_bok(parsed_predictions))

                input_iou = calculate_iou_metrics(parsed_labels, final_boxes)
            ious_number += input_iou
            pbar.update()
    ious.append(ious_number)

fig, ax = plt.subplots(figsize=(12, 8))
positions = range(1, len(numbers) + 1)
box = ax.boxplot(ious, positions=positions, patch_artist=True, boxprops=dict(facecolor='lightblue'))

# Add statistics as text on the graph
stats = {
    'mean': [],
    'std': [],
    'median': []
}

for ious_number in ious:
    stats['mean'].append(np.mean(ious_number))
    stats['std'].append(np.std(ious_number))
    stats['median'].append(np.median(ious_number))

y_offset = 0.01 
text_color = 'black'
for pos, mean, std, median in zip(positions, stats['mean'], stats['std'], stats['median']):
    ax.text(pos, median - y_offset, f'Median: {median:.2f}', horizontalalignment='center', color=text_color, weight='bold', verticalalignment='top')
    ax.text(pos, mean - 2*y_offset, f'Mean: {mean:.2f}\nSTD: {std:.2f}', horizontalalignment='center', color=text_color, verticalalignment='top')

# Change the labels for the BoK configurations
labels = [f'BoK {num}' for num in numbers]
labels[0] = 'BoK 1 = NMS'

plt.xticks(ticks=positions, labels=labels)
plt.xlabel('Configuration BoK')
plt.ylabel('IOU Score')
plt.title('Boxplot of IOU Scores for Different BoK Configurations')
plt.grid(True)
plt.show()

### Unit test on a specific image

In [None]:
image_path = "datasets/yolo/images/2505_inventory26.png"
with Image.open(image_path) as img:
    img = img.convert('RGB')
    img = img.resize((IMAGE_SIZE[1],IMAGE_SIZE[0]))
    img = np.array(img)/255
    img = np.expand_dims(img, axis=0)

y_pred = model(img,training=False)
draw_predict(img[0], y_pred[0], bok=BOK_FACTOR)



### Unit test dashboard

In [None]:
# Load the image and display it
def load_image(b):
    image_path = image_dropdown.value
    with Image.open(image_path) as img:
        img = img.convert('RGB')
        img = img.resize((IMAGE_SIZE[1], IMAGE_SIZE[0]))
        output_image.clear_output()
        with output_image:
            plt.figure(figsize=(8, 8))
            plt.imshow(img)
            plt.axis('off')
            plt.show()
        global img_array  # Use it later for prediction
        img_array = np.array(img) / 255.0
        img_array = np.expand_dims(img_array, axis=0)

# Predict and display the bounding boxes
def predict(b):
    if 'img_array' in globals():
        y_pred = model(img_array, training=False)
        output_prediction.clear_output()
        with output_prediction:
            draw_predict(img_array[0], y_pred[0], bok=BOK_FACTOR)

# Widgets
image_files = [os.path.join(IMAGES_FOLDER, file) for file in os.listdir(IMAGES_FOLDER) if file.endswith('.jpg') or file.endswith('.png')]
image_dropdown = Dropdown(options=image_files, description='Select Image:')

button_load = Button(description='Load Image')
button_predict = Button(description='Predict')

output_image = Output()
output_prediction = Output()

button_load.on_click(load_image)
button_predict.on_click(predict)

# Layout
box_layout = Layout()
button_box = HBox([button_load, button_predict], layout=box_layout)
output_box = HBox([output_image, output_prediction], layout=box_layout)

# Display
display(image_dropdown, button_box, output_box)


### Comparaison BoK vs NMS

In [None]:
generator = select_generator()

ious_nms = []
ious_bok = []
with tqdm(total=len(generator), desc='Processing Batches', unit='batch') as pbar:
    for step in range(len(generator)):
        inputs, labels = generator[step]

        predictions = model(inputs, training=False)
        
        for i in range(len(inputs)):
            label = labels[i]
            prediction = predictions[i]

            parsed_labels = parse_label(label)  # [[x_center, y_center, width, height, 1, anchor_index], ..]
            predicted_boxes_bok = parse_prediction(prediction, bok=BOK_FACTOR)  # [[(x_center, y_center, width, height, confidence, anchor_index)], ..]
            predicted_boxes_nms = [tuple(box[0]) for box in predicted_boxes_bok]  # [(x_center, y_center, width, height, confidence, anchor_index), ..] Keep only the best box
            

            # Calculate the final boxes
            final_boxes_nms = non_maximum_suppression(predicted_boxes_nms)
            final_boxes_bok = non_maximum_suppression(calculate_bok(predicted_boxes_bok))

            # Calculate IOU metrics
            iou_nms = calculate_iou_metrics(final_boxes_nms, parsed_labels)
            iou_bok = calculate_iou_metrics(final_boxes_bok, parsed_labels)

        ious_nms += iou_nms
        ious_bok += iou_bok
        pbar.update(1)

# Plot the IOU distributions
plt.figure(figsize=(12, 6))
plt.hist(ious_nms, bins=500, alpha=0.7, label='NMS IOU', color='blue')
plt.hist(ious_bok, bins=500, alpha=0.7, label='BoK IOU', color='green')
plt.xlabel('IOU Score')
plt.ylabel('Frequency')
plt.title('Distribution of IOU Scores for NMS vs. BoK')
plt.legend()
plt.grid(True)

plt.show()

print_statistics(ious_nms, 'NMS IOU')
print_statistics(ious_bok, 'BoK IOU')