## Imports, Constants and Global Variables

In [None]:
'''
IMPORTS
'''
import cv2
import numpy as np
import tensorflow as tf
from PIL import Image
import os
import keras
import pandas as pd
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence , plot_model
from tensorflow.keras.models import load_model
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import absl.logging
import logging
import math
import time
import json
from sklearn.cluster import KMeans
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
from PIL import Image
from tqdm import tqdm # progress bar
from datetime import datetime
np.random.seed(42)



absl.logging.set_verbosity(absl.logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.WARNING)
#tf.config.run_functions_eagerly(True)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(gpus, "Physical GPUs,", logical_gpus, "Logical GPUs")
    except RuntimeError as e:
        print(e)

'''
CONSTANTS
'''

# CONSTANTES
DATASET_FOLDER = 'datasets/yolo' # Dataset folder with annotations and images
DATASET_SPLIT = (0.85,0.15,0.00) # Split between train, val and test
LABELS_FOLDER = DATASET_FOLDER + '/labels'
IMAGES_FOLDER = DATASET_FOLDER + '/images'
IMAGE_SIZE = (576, 1024) # Heigth, Width
MODEL_EPOCHS = 1000 # Number of epoch
MODEL_CELLULES = (18,32) # Number of cells heigth, width (line,column)
MODEL_BATCH_SIZE = 8 # batch size
MODEL_LEARNING_RATE = 1e-5 #2e-4 # 9e-5
MODEL_PATIENCE = 8 

#MODEL_ANCHOR_BOXES = [(7.771702728797546, 11.099425402414909), (16.233133106872444, 4.676295486695418), (26.65295286429116, 2.322343759539281), (9.476821452651933, 3.7753047321480744), (3.642066698646129, 5.355965495956248), (5.794567115957937, 2.5241804259133427), (3.5527940911190563, 2.2905150829398315), (1.7457785313564547, 2.229531520410109), (1.2712882556016014, 1.0891510755676512)] # Heigth, Width with k=9 in k-means

#MODEL_ANCHOR_BOXES = [(1.2593828217608534, 1.0782512065998489), (5.811058907896213, 2.5475515470119525), (25.958305961937022, 2.7822420043362435), (3.7260445560310216, 5.284102175173011), (3.5137867478357787, 2.28769758866406), (16.055079462929534, 4.655634908321043), (1.739537078536119, 2.2008474914889478), (7.646170972878436, 11.903683805213458), (9.540802666027334, 3.8905800088637648)]

MODEL_ANCHOR_BOXES = [(3.5366531315524656, 2.551379744762401), (1.7480454891308626, 2.2060517178004098), (17.2081367020786, 4.359324877329534), (5.738342136432118, 2.592679799959159), (3.84775641160288, 6.251749903227729), (1.281164791131598, 1.0737017588530986), (9.372440051183187, 3.760986000885806), (28.522303642597052, 3.649796399274048), (7.544125627267754, 12.321541481062685)]

MODEL_LAMBDA_COORD = 1.5
MODEL_LAMBDA_IOU = 0.8
MODEL_LAMBDA_NOOBJ = 0.2

THRESHOLD_CONFIDENCE = 0.7 # Seuil de confiance pour la détection d'objet
MODEL_MINIMAL_IOU = 0.5 # Seuil minimal d'IOU pour considérer une détection comme correcte
MODEL_GRID_SENSIBILITY_COEF = 1.2 # Coefficient d'extension de la sigmoid pour x,y
MODEL_SIGMOID_MULTIPLIER = 1 # Multiplicateur de la sigmoid pour w,h
MODEL_SIGMOID_ADDER = 0.5 # Ajout à la sigmoid pour w,h

MODEL_CELLULES_SIZE = (IMAGE_SIZE[0]/MODEL_CELLULES[0], IMAGE_SIZE[1]/MODEL_CELLULES[1]) # Taille d'une cellule en pixels
MODEL_ANCHOR_BOXES_PIXELSIZE = [(MODEL_CELLULES_SIZE[0]*anchor[1], MODEL_CELLULES_SIZE[1]*anchor[0]) for anchor in MODEL_ANCHOR_BOXES] # Taille des anchor boxes en pixels (hauteur,largeur)
MODEL_ANCHOR_BOXES_PIXELSIZE_2 = [(anchor[0]/2,anchor[1]/2) for anchor in MODEL_ANCHOR_BOXES_PIXELSIZE] # Taille des anchor boxes en pixels (divisée par 2)
MODEL_CELLULES_SIZE_INV = (1/MODEL_CELLULES_SIZE[0], 1/MODEL_CELLULES_SIZE[1]) # % d'une cellule en pixels (utile pour les calculs de coordonnées)
MODEL_ANCHOR_BOXES_COLOR = [(np.random.rand(),np.random.rand(),np.random.rand()) for i in range(len(MODEL_ANCHOR_BOXES))] # Couleurs des anchor boxes

dataset_filepath = np.array([ (image, label) for image, label in zip(os.listdir(IMAGES_FOLDER), os.listdir(LABELS_FOLDER)) ])
dataset_indices = np.arange(len(dataset_filepath))
np.random.shuffle(dataset_indices)

'''
UTILS
'''

# Select generator
def select_generator():
    selected_input = input('Choose the generator (train, val, test, default : train) : ')
    if selected_input == '' :
        return train_generator
    elif selected_input == 'train' :
        return train_generator
    elif selected_input == 'val' :
        return val_generator
    elif selected_input == 'test' :
        return test_generator
    else :
        raise ValueError('Unknown subset (train, val, test)')
                           
# Show inference
def draw_predict(image, y_pred=None, y_true=None, showprederrors=True, nms=False, only_true=False):
    def sigmoid(x):
        x=np.clip(x, -50, 50)
        return 1/(1+np.exp(-x))

    if only_true and y_true is None:
        print("Erreur : only_true est True mais y_true est None")
        return
    fig, ax = plt.subplots(1)
    # Séparer les prédictions
    if y_pred is not None:
        pred_boxes=y_pred[..., 0:4]
        pred_conf=tf.sigmoid(y_pred[..., 4])
        # Afficher l'image avec les bounding boxes
        nb_anchor = 0
        #distribution_anchor = [0 for i in range(len(MODEL_ANCHOR_BOXES))]
        distribution_anchor_conf = [0 for i in range(len(MODEL_ANCHOR_BOXES))]
        # Dessiner les lignes verticales et horizontales pour les cellules
        for i in range(MODEL_CELLULES[1] + 1):  # Lignes verticales
            ax.axvline(x=i * MODEL_CELLULES_SIZE[1], color='w', linestyle='-', linewidth=0.1)
        for j in range(MODEL_CELLULES[0] + 1):  # Lignes horizontales
            ax.axhline(y=j * MODEL_CELLULES_SIZE[0], color='w', linestyle='-', linewidth=0.1)
        # Ajouter les prédictions des boîtes englobantes à l'image
        for i in range(MODEL_CELLULES[0]):
            for j in range(MODEL_CELLULES[1]):
                show_box = True
                if only_true :
                    show_box = False if np.sum(y_true[i,j,:,4]) == 0 else True
                if nms and show_box :
                    k_max = np.argmax(pred_conf[i, j])
                    box = pred_boxes[i, j, k_max]
                    if pred_conf[i,j,k_max] > THRESHOLD_CONFIDENCE :
                        # Conversion de la box prédite en coordonnées
                        x_center = (j + MODEL_GRID_SENSIBILITY_COEF*sigmoid(box[0]) - (MODEL_GRID_SENSIBILITY_COEF-1)/2) * MODEL_CELLULES_SIZE[1]
                        y_center = (i + MODEL_GRID_SENSIBILITY_COEF*sigmoid(box[1]) - (MODEL_GRID_SENSIBILITY_COEF-1)/2) * MODEL_CELLULES_SIZE[0]
                        w_box = (MODEL_SIGMOID_MULTIPLIER*sigmoid(box[2]) + MODEL_SIGMOID_ADDER ) * MODEL_ANCHOR_BOXES[k_max][0] * MODEL_CELLULES_SIZE[1]
                        h_box = (MODEL_SIGMOID_MULTIPLIER*sigmoid(box[3]) + MODEL_SIGMOID_ADDER ) * MODEL_ANCHOR_BOXES[k_max][1] * MODEL_CELLULES_SIZE[0]
                        # Calcul des coins de la box
                        x_min = x_center - w_box / 2
                        y_min = y_center - h_box / 2
                        # Ajout des données à la distribution
                        nb_anchor += 1
                        distribution_anchor_conf[k_max] += 1
                        # Dessiner la boîte englobante
                        rect=patches.Rectangle((x_min, y_min), w_box, h_box, linewidth=1, edgecolor=MODEL_ANCHOR_BOXES_COLOR[k_max], facecolor='none')
                        ax.add_patch(rect)
                        ax.scatter(x_center, y_center, color=MODEL_ANCHOR_BOXES_COLOR[k_max], s=2)
                elif show_box :
                    for k in range(len(MODEL_ANCHOR_BOXES)):
                        box = pred_boxes[i, j, k]
                        if pred_conf[i, j, k] > THRESHOLD_CONFIDENCE :
                            # Conversion de la box prédite en coordonnées
                            x_center = (j + MODEL_GRID_SENSIBILITY_COEF*sigmoid(box[0]) - (MODEL_GRID_SENSIBILITY_COEF-1)/2) * MODEL_CELLULES_SIZE[1]
                            y_center = (i + MODEL_GRID_SENSIBILITY_COEF*sigmoid(box[1]) - (MODEL_GRID_SENSIBILITY_COEF-1)/2) * MODEL_CELLULES_SIZE[0]
                            #w_box = np.exp(box[2]) * MODEL_ANCHOR_BOXES[k][0] * MODEL_CELLULES_SIZE[1] # A AMELIORER
                            w_box = (MODEL_SIGMOID_MULTIPLIER*sigmoid(box[2]) + MODEL_SIGMOID_ADDER )* MODEL_ANCHOR_BOXES[k][0] * MODEL_CELLULES_SIZE[1]
                            #h_box = np.exp(box[3]) * MODEL_ANCHOR_BOXES[k][1] * MODEL_CELLULES_SIZE[0] # A AMELIORER
                            h_box = (MODEL_SIGMOID_MULTIPLIER*sigmoid(box[3]) + MODEL_SIGMOID_ADDER )* MODEL_ANCHOR_BOXES[k][1] * MODEL_CELLULES_SIZE[0]
                                
                            # Calcul des coins de la box
                            x_min = x_center - w_box / 2
                            y_min = y_center - h_box / 2
                            # Ajout des données à la distribution
                            nb_anchor += 1
                            distribution_anchor_conf[k] += 1
                            # Dessiner la boîte englobante
                            rect=patches.Rectangle((x_min, y_min), w_box, h_box, linewidth=1, edgecolor=MODEL_ANCHOR_BOXES_COLOR[k], facecolor='none')
                            ax.add_patch(rect)
                            ax.scatter(x_center, y_center, color=MODEL_ANCHOR_BOXES_COLOR[k], s=2)

    # Ajouter les vérités terrain à l'image
    if y_true is not None and not only_true:
        # On converti les coordonnées des vérités terrain
        for i in range(MODEL_CELLULES[0]):
            for j in range(MODEL_CELLULES[1]):
                for anchor in range(len(MODEL_ANCHOR_BOXES)):
                    box = y_true[i, j, anchor]
                    if box[4] == 1:
                        # Coordonnées absolues de la boîte englobante dans l'image
                        x_center_abs = (j + box[0]) * MODEL_CELLULES_SIZE[0]
                        y_center_abs = (i + box[1]) * MODEL_CELLULES_SIZE[1]
                        width_abs = box[2] * MODEL_ANCHOR_BOXES_PIXELSIZE[anchor][1]
                        height_abs = box[3] * MODEL_ANCHOR_BOXES_PIXELSIZE[anchor][0]

                        # Dessiner la boîte englobante
                        x_min = x_center_abs - width_abs / 2
                        y_min = y_center_abs - height_abs / 2
                        rect = patches.Rectangle((x_min, y_min), width_abs, height_abs, linewidth=1, edgecolor=MODEL_ANCHOR_BOXES_COLOR[anchor], facecolor='none')
                        ax.add_patch(rect)
                        # Dessiner un point rouge au centre de la boîte englobante
                        ax.scatter(x_center_abs, y_center_abs, color=MODEL_ANCHOR_BOXES_COLOR[anchor], s=2)  # `s` contrôle la taille du point

    plt.title(f"{nb_anchor}       {distribution_anchor_conf}")
    ax.imshow(image)
    plt.show()

# Show 2D or 3D tensor
def show_tensor_nd(name, tensor, norm=False):
    height, width, channels = tensor.shape
    # Étendre les cellules horizontalement pour placer les valeurs de canaux côte à côte
    # Création d'une image agrandie où chaque cellule est remplacée par une grille de N sous-cellules
    decomposed_image = np.zeros((height, width * channels, 3))

    for i in range(channels):
        # Sélection des valeurs du canal i
        channel_values = tensor[:, :, i]
        # Normalisation des valeurs pour la cartographie de couleurs
        if norm :
            normalized_values = (channel_values - tf.reduce_min(channel_values)) / (tf.reduce_max(channel_values) - tf.reduce_min(channel_values))
        else :
            normalized_values = channel_values
        # Mappage des valeurs normalisées à une carte de couleurs
        cmap = plt.get_cmap('viridis')
        colored_values = cmap(normalized_values.numpy())[:, :, :3]  # Prendre seulement les composantes RGB, ignorer alpha
        # Remplir l'image agrandie, chaque canal placé côte à côte
        decomposed_image[:, i::channels, :] = colored_values

    # Affichage de l'image résultante
    fig, ax = plt.subplots(figsize=(12, 10))
    ax.imshow(decomposed_image, aspect='auto')

    # Ajouter des lignes pour délimiter les canaux et les cellules
    for y in range(1, height):
        ax.axhline(y=y - 0.5, color='white', linestyle='-', linewidth=1)
    for x in range(channels, width * channels, channels):
        ax.axvline(x=x - 0.5, color='white', linestyle='-', linewidth=1)

    # Ajouter une barre de couleur pour le gradient utilisé
    norm = plt.Normalize(vmin=0, vmax=1)
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])
    cbar = plt.colorbar(sm, ax=ax, orientation='vertical', fraction=0.02, pad=0.04)
    cbar.set_label('Normalized Channel Intensity')

    # Configuration des titres et axes
    ax.set_title(name)
    ax.axis('off')  # Désactiver les axes pour une meilleure clarté

    plt.show()

# Get info from tensor
def tensor_stats(name,tensor, simple = False, print_tensor = False):
    # Calcul de l'élément le plus petit
    min_val = tf.reduce_min(tensor)
        
    # Calcul de l'élément le plus grand
    max_val = tf.reduce_max(tensor)
        
    # Calcul de la moyenne des éléments
    mean_val = tf.reduce_mean(tf.cast(tensor, tf.float32))
        
    # Calcul de l'écart-type des éléments
    stddev_val = tf.math.reduce_std(tf.cast(tensor, tf.float32))
        
    # Calcul de la médiane des éléments
    # Pour la médiane, nous devons d'abord aplatir le tensor, le trier, puis trouver l'élément médian
    tensor_flat = tf.reshape(tensor, [-1])
    tensor_sorted = tf.sort(tensor_flat)
    median_val = tensor_sorted[tf.size(tensor_flat) // 2] if tf.size(tensor_flat) % 2 != 0 else \
                    (tensor_sorted[tf.size(tensor_flat) // 2 - 1] + tensor_sorted[tf.size(tensor_flat) // 2]) / 2.0
        
    # Utiliser tf.print pour afficher les résultats

    if simple :
        tf.print(name, ":",'Value:', tensor)
    else :
        tf.print(name, ":",'Min:', min_val, 'Max:', max_val, 'Mean:', mean_val, 'Stddev:', stddev_val, 'Median:', median_val, 'Shape:', tf.shape(tensor))
    if print_tensor :
        tf.print(tensor, summarize=-1)

# Plot metrics
def plot_training_curves(history, val_history):
    epochs = len(history['loss'])  # Nombre total d'époques
    epoch_range = range(1, epochs + 1)

    plt.figure(figsize=(16, 12))

    metrics = ['loss', 'loss_coord', 'loss_pred_iou', 'loss_pred_noobj', 'best_iou_mean']
    titles = ['Loss', 'IOU Loss', 'Conf Loss', 'No Object Loss', 'Best IOU Mean']
    y_labels = ['Loss', 'Loss', 'Loss', 'Loss', 'IOU']

    for i, metric_key in enumerate(metrics):
        plt.subplot(3, 2, i + 1)
        train_values = [np.mean(loss) for loss in history[metric_key]]
        val_values = [np.mean(loss) for loss in val_history['val_' + metric_key]]
        plt.plot(epoch_range, train_values, 'b', label='Train Mean', linewidth=2)
        plt.plot(epoch_range, val_values, 'r', label='Validation Mean', linewidth=2)
        plt.title(titles[i])
        plt.xlabel('Epochs')
        plt.ylabel(y_labels[i])
        plt.legend()

    plt.tight_layout()
    plt.show()

'''
STATS
'''
print(f'Number of images in the dataset : {len(dataset_indices)}')




In [None]:
model = load_model('models\yolo\yolo_12-16.h5')

## Cluster

### Extract labels

In [None]:
# Récupérer les bounding boxes
bounding_boxes = []
for label,image in zip(os.listdir(LABELS_FOLDER), os.listdir(IMAGES_FOLDER)):
    with open(os.path.join(LABELS_FOLDER, label), 'r') as f:
        for line in f:
            _, _, _, w, h = line.split()
            # On récupère la taille de l'image dans le dossier images
            with Image.open(os.path.join(IMAGES_FOLDER, image)) as img:
                img_w, img_h = img.size
            old_aspect_ratio = img_w / img_h

            # Convertir les coordonnées pour les avoir en pixels puis en taille de cellules
            w = float(w) * MODEL_CELLULES[1]
            h = float(h) * MODEL_CELLULES[0]
            # Ajouter la bounding box à la liste
            bounding_boxes.append((w, h))

# Convertir la liste en un numpy array pour l'utilisation avec scikit-learn
kmean_data = np.array(bounding_boxes)
print(f'Number of items in the dataset : {kmean_data.shape[0]}')

### K-means

In [None]:

k = 9 # Number of clusters

# Initialiser k-means avec le nombre de clusters désiré
kmeans = KMeans(n_clusters=k, random_state=42)

# Ajuster le modèle sur les données des boîtes englobantes
kmeans.fit(kmean_data)

# Les centres des clusters représentent vos anchor boxes optimales
anchor_boxes = kmeans.cluster_centers_

# Afficher les boîtes englobantes
plt.scatter(kmean_data[:, 0], kmean_data[:, 1], c=kmeans.labels_, cmap='viridis', marker='o', label='Bounding box')

# Afficher les centres des clusters (anchor boxes)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', marker='x', label='Cluster center')

# Ajouter des titres et légendes
plt.title('Anchor Boxes Clustering')
plt.xlabel('Width')
plt.ylabel('Height')
plt.legend()

# Afficher le graphique
plt.show()

# Calculer et afficher les métriques
inertia = kmeans.inertia_
silhouette_avg = silhouette_score(kmean_data, kmeans.labels_)
print(f'Model Inertia: {inertia}')
print(f'Silhouette Score: {silhouette_avg}')
anchor_boxes = [ (x,y) for x,y in kmeans.cluster_centers_]
print(f'Anchor boxes : {anchor_boxes}')

## Dataset

### Load dataset

In [None]:
# Generator
class CustomDatasetLoaderYOLO(Sequence):
    def __init__(self, subset):
        def get_subset(subset, lenght):
            train_end = int(DATASET_SPLIT[0] * lenght)
            val_end = train_end + int(lenght* DATASET_SPLIT[1])
            if subset == 'train':
                return 0, train_end
            elif subset == 'val':
                return train_end,val_end
            elif subset == 'test':
                return val_end, lenght
            else:
                raise ValueError('Unknown subset (train, val, test)')
        
        start, stop = get_subset(subset, len(dataset_indices))
        self.indices = dataset_indices[start:stop]
 
    def __len__(self):
        return np.ceil(len(self.indices) / MODEL_BATCH_SIZE).astype('int')

    def __getitem__(self, idx):
        batch_indices = self.indices[idx * MODEL_BATCH_SIZE : (idx + 1) * MODEL_BATCH_SIZE]

        batch_images = []
        batch_labels = []
        for image, label in dataset_filepath[batch_indices] :
            # load image
            with Image.open(os.path.join(IMAGES_FOLDER,image)) as img:
                img = img.convert('RGB')
                #original_image_height, original_image_width = img.size
                #ratiox = original_image_height / IMAGE_SIZE[0]
                #ratioy = original_image_width / IMAGE_SIZE[1]
                img = img.resize((IMAGE_SIZE[1],IMAGE_SIZE[0]))
                batch_images.append(np.array(img))
            # load label
            img_label = np.zeros((MODEL_CELLULES[0], MODEL_CELLULES[1], len(MODEL_ANCHOR_BOXES) ,5), dtype=np.float32)
            for line in open(os.path.join(LABELS_FOLDER,label)).readlines():
                _ ,x_center, y_center, width, height = line.split()[0:5] # we use _ because we don't use the class index
                x_center, y_center, width, height = map(float, (x_center, y_center, width, height))
                
                
                # Find the right cell
                cell_x, rest_x = divmod(MODEL_CELLULES[1]*x_center, 1) # Y is height et X is width
                cell_y, rest_y = divmod(MODEL_CELLULES[0]*y_center, 1)

                # Find ther cell size
                box_width, box_height = width * IMAGE_SIZE[1], height * IMAGE_SIZE[0]

                # Find the right anchor box = the one with the best IOU
                best_iou = 0
                for i, (anchorbox_height, anchorbox_width) in enumerate(MODEL_ANCHOR_BOXES_PIXELSIZE):
                    iou = min(anchorbox_width, box_width) * min(anchorbox_height, box_height) / (max(anchorbox_width, box_width) * max(anchorbox_height, box_height))
                    if iou > best_iou:
                        best_iou = iou
                        best_anchor = i
                # Calculate the coords inside the cell
                width_cell = box_width / (MODEL_ANCHOR_BOXES[best_anchor][0]*MODEL_CELLULES_SIZE[0])
                height_cell = box_height / (MODEL_ANCHOR_BOXES[best_anchor][1]*MODEL_CELLULES_SIZE[1])

                # Adding coords to the label
                img_label[int(cell_y), int(cell_x), best_anchor, 0] = rest_x
                img_label[int(cell_y), int(cell_x), best_anchor, 1] = rest_y
                img_label[int(cell_y), int(cell_x), best_anchor, 2] = width_cell
                img_label[int(cell_y), int(cell_x), best_anchor, 3] = height_cell
                img_label[int(cell_y), int(cell_x), best_anchor, 4] = 1 # Show that there is an item at this cell and anchor
            batch_labels.append(img_label)

        batch_images = np.asarray(batch_images) / 255
        batch_labels = np.asarray(batch_labels).astype(np.float32)
        
        return batch_images ,batch_labels

    def on_epoch_end(self):
        np.random.shuffle(self.indices)
        
train_generator = CustomDatasetLoaderYOLO('train')
val_generator = CustomDatasetLoaderYOLO('val')
test_generator = CustomDatasetLoaderYOLO('test')

print(f'train dataset lenght : {len(train_generator)}')
print(f'Number of images in train dataset : {len(train_generator.indices)}')

### Testing dataset

#### Show random image+label from the dataset

In [None]:
def show_sample_image_with_boxes_and_grid(generator, figsize=(12, 12)):
    # Extract data from generator
    generator_len = len(generator)
    random_id = np.random.randint(0, generator_len)
    images, labels = generator[random_id]
    idx = np.random.choice(images.shape[0])
    image = images[idx]
    fig, ax = plt.subplots(1, figsize=figsize)
    # Show image
    ax.imshow(image)


    # Plot cells
    for i in range(MODEL_CELLULES[1] + 1):  # Lignes verticales
        ax.axvline(x=i * MODEL_CELLULES_SIZE[1], color='w', linestyle='-', linewidth=0.1)
    for j in range(MODEL_CELLULES[0] + 1):  # Lignes horizontales
        ax.axhline(y=j * MODEL_CELLULES_SIZE[0], color='w', linestyle='-', linewidth=0.1)

    # Plot bounding boxes
    label = labels[idx]
    for i in range(MODEL_CELLULES[0]): 
        for j in range(MODEL_CELLULES[1]):
            for anchor in range(len(MODEL_ANCHOR_BOXES_PIXELSIZE)):
                box = label[i, j, anchor]
                if box[4] == 1:  # Detect item
                    # Absolute coords of the bounding box
                    x_center_abs = (j + box[0]) * MODEL_CELLULES_SIZE[0]
                    y_center_abs = (i + box[1]) * MODEL_CELLULES_SIZE[1]
                    width_abs = box[2] * MODEL_ANCHOR_BOXES_PIXELSIZE[anchor][1]
                    height_abs = box[3] * MODEL_ANCHOR_BOXES_PIXELSIZE[anchor][0]

                    # Plot of the bounding box
                    x_min = x_center_abs - width_abs / 2
                    y_min = y_center_abs - height_abs / 2
                    rect = patches.Rectangle((x_min, y_min), width_abs, height_abs, linewidth=1, edgecolor=MODEL_ANCHOR_BOXES_COLOR[anchor], facecolor='none')
                    ax.add_patch(rect)
                    ax.scatter(x_center_abs, y_center_abs, color='r', s=1)  # add little dot

    plt.show()

show_sample_image_with_boxes_and_grid(select_generator())

#### Measure processing time

----
**Avg computer**

Mean batch time for train set: 85.7 ms +- 41.9 ms

Mean batch time for val set: 86.0 ms +- 45.7 ms

Mean batch time for test set: 86.7 ms +- 32.8 ms

----


In [None]:
def measure_generator_speed(generator):
    times = []
    
    for i in range(len(generator)) :
        start_time = time.time()  # Démarre le chronomètre
        generator[i]  # Génère un batch et ignore le résultat
        end_time = time.time()  # Arrête le chronomètre
        total_time = end_time - start_time
        times.append(total_time)
    
    avg_time = np.mean(times) * 1000
    std_time = np.std(times) * 1000
    
    
    return avg_time, std_time

# Mesurer la vitesse des générateurs
avg_time_train, std_time_train = measure_generator_speed(train_generator)
avg_time_val, std_time_val = measure_generator_speed(val_generator)
avg_time_test, std_time_test = measure_generator_speed(test_generator)

print(f"Mean batch time for train set: {avg_time_train:.1f} ms +- {std_time_train:.1f} ms")
print(f"Mean batch time for val set: {avg_time_val:.1f} ms +- {std_time_val:.1f} ms")
print(f"Mean batch time for test set: {avg_time_test:.1f} ms +- {std_time_test:.1f} ms")


## Model

### Model Definition

In [None]:
def YOLO():
    def res_block(input, filters, size=2, kernel_size=3) :
        skip = layers.Conv2D(filters, (1,1), padding='same')(input)
        skip = layers.BatchNormalization()(skip)
        skip = layers.LeakyReLU(alpha=0.1)(skip)
        for i in range(size) :
            input = layers.Conv2D(filters, kernel_size, padding='same')(input)
            input = layers.BatchNormalization()(input)
            input = layers.LeakyReLU(alpha=0.1)(input)
        input = layers.Add()([skip, input])
        input = layers.BatchNormalization()(input)
        input = layers.LeakyReLU(alpha=0.1)(input)
        return input

    # Entrées
    input_img = layers.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

    x = layers.BatchNormalization()(input_img)
    x = res_block(x, 64, 3, kernel_size=5)
    x = layers.Conv2D(64, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 128, 3)
    x = res_block(x, 128, 3)
    x = layers.Conv2D(128, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 256, 3)
    x = res_block(x, 256, 3)
    x = layers.Conv2D(256, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 256, 3)
    x = res_block(x, 256, 3)
    x = layers.Conv2D(256, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 512, 3)
    x = res_block(x, 512, 3)
    x = layers.Conv2D(512, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 1024, 2)
    x = res_block(x, 1024, 2)

    # Sorties
    output = layers.Conv2D(5*len(MODEL_ANCHOR_BOXES), (3, 3), padding='same')(x)
    output = layers.Reshape((MODEL_CELLULES[0], MODEL_CELLULES[1], len(MODEL_ANCHOR_BOXES), 5))(output)

    # Modèle
    model = models.Model(inputs=input_img, outputs=output, name='YOLO')
    return model

def MiniYOLO():
    def res_block(input, filters, size=2, kernel_size=3) :
        skip = layers.Conv2D(filters, (1,1), padding='same')(input)
        skip = layers.BatchNormalization()(skip)
        skip = layers.LeakyReLU(alpha=0.1)(skip)
        for i in range(size) :
            input = layers.Conv2D(filters, kernel_size, padding='same')(input)
            input = layers.BatchNormalization()(input)
            input = layers.LeakyReLU(alpha=0.1)(input)
        input = layers.Add()([skip, input])
        input = layers.BatchNormalization()(input)
        input = layers.LeakyReLU(alpha=0.1)(input)
        return input

    # Entrées
    input_img = layers.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

    x = layers.BatchNormalization()(input_img)
    x = res_block(x, 16, 1, kernel_size=5)
    x = res_block(x, 16, 2)
    x = res_block(x, 16, 2)
    x = layers.Conv2D(32, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 32, 2)
    x = res_block(x, 32, 2)
    x = res_block(x, 32, 2)    
    x = layers.Conv2D(64, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 64, 2)
    x = res_block(x, 64, 2)
    x = res_block(x, 64, 2)
    x = res_block(x, 64, 2)
    x = layers.Conv2D(128, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 128, 2)
    x = res_block(x, 128, 2)
    x = res_block(x, 128, 2)
    x = res_block(x, 128, 2)
    x = res_block(x, 128, 2)
    x = layers.Conv2D(256, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 256, 2)
    x = res_block(x, 256, 2)
    x = res_block(x, 256, 2)
    x = res_block(x, 256, 2)
    x = layers.Conv2D(512, 3, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = res_block(x, 512, 2)
    x = res_block(x, 512, 2)
    x = res_block(x, 512, 2)

    # Sorties
    output = layers.Conv2D(5*len(MODEL_ANCHOR_BOXES), (3, 3), padding='same')(x)
    output = layers.Reshape((MODEL_CELLULES[0], MODEL_CELLULES[1], len(MODEL_ANCHOR_BOXES), 5))(output)

    # Modèle
    model = models.Model(inputs=input_img, outputs=output, name='MiniYOLO')
    return model

model = MiniYOLO()
model.summary()

### Model Training

In [None]:
# DIOU / PRED_IOU (loss_coord, loss_conf_iou, loss_noobj)
@tf.function
def yolo_loss(y_true, y_pred):
  
    # Fonctions de perte de régression
    binary_crossentropy = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
    binary_crossentropy_ls = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE, label_smoothing=0.01)

    # Définition de certains tenseurs constants
    anchor_sizes = tf.constant(MODEL_ANCHOR_BOXES, dtype=tf.float32)
    pattern_ascending = tf.tile(tf.range(MODEL_CELLULES[1], dtype=tf.float32)[tf.newaxis, :], [MODEL_CELLULES[0], 1]) # Pour les coordonnées x (0,1,2,..,MODEL_CELLULES[1]-1)
    pattern_row_index = tf.tile(tf.range(MODEL_CELLULES[0], dtype=tf.float32)[:, tf.newaxis], [1, MODEL_CELLULES[1]]) # Pour les coordonnées y (0,0,0,..,MODEL_CELLULES[0]-1)
    pattern_ascending = tf.expand_dims(pattern_ascending, axis=0)  # Ajouter des dimensions pour la compatibilité avec les calculs
    pattern_ascending = tf.expand_dims(pattern_ascending, axis=-1)  # Ajouter des dimensions pour la compatibilité avec les calculs
    pattern_row_index = tf.expand_dims(pattern_row_index, axis=0)  # Ajouter des dimensions pour la compatibilité avec les calculs
    pattern_row_index = tf.expand_dims(pattern_row_index, axis=-1)  # Ajouter des dimensions pour la compatibilité avec les calculs

    # Séparation des différentes parties des prédictions
    pred_x, pred_y, pred_w, pred_h, pred_conf= tf.split(y_pred, (1, 1, 1, 1, 1), axis=-1)
    pred_x = tf.squeeze(pred_x, axis=-1)
    pred_y = tf.squeeze(pred_y, axis=-1)
    pred_w = tf.squeeze(pred_w, axis=-1)
    pred_h = tf.squeeze(pred_h, axis=-1)
    pred_conf = tf.squeeze(pred_conf, axis=-1)
    converted_pred_x = pattern_ascending + MODEL_GRID_SENSIBILITY_COEF*tf.sigmoid(pred_x) - (MODEL_GRID_SENSIBILITY_COEF-1)/2
    converted_pred_y = pattern_row_index + MODEL_GRID_SENSIBILITY_COEF*tf.sigmoid(pred_y) - (MODEL_GRID_SENSIBILITY_COEF-1)/2
    converted_pred_w = (MODEL_SIGMOID_MULTIPLIER*tf.sigmoid(pred_w) + MODEL_SIGMOID_ADDER )* anchor_sizes[:, 0]
    converted_pred_h = (MODEL_SIGMOID_MULTIPLIER*tf.sigmoid(pred_h) + MODEL_SIGMOID_ADDER )* anchor_sizes[:, 1]
    converted_pred_conf = tf.sigmoid(pred_conf)
    
    # Séparation des différentes parties des vérités terrain
    true_x, true_y, true_w, true_h, true_conf = tf.split(y_true, (1, 1, 1, 1, 1), axis=-1)
    true_x = tf.math.reduce_sum(tf.squeeze(true_x, axis=-1), axis=-1, keepdims=True)
    true_y = tf.math.reduce_sum(tf.squeeze(true_y, axis=-1), axis=-1, keepdims=True)
    converted_true_x = true_x + pattern_ascending
    converted_true_y = true_y + pattern_row_index
    true_w = tf.squeeze(true_w, axis=-1)
    true_h = tf.squeeze(true_h, axis=-1)
    converted_true_w = tf.math.reduce_sum(true_w * anchor_sizes[:, 0], axis=-1, keepdims=True)  # Largeur vraie ajustée
    converted_true_h = tf.math.reduce_sum(true_h * anchor_sizes[:, 1], axis=-1, keepdims=True)  # Hauteur vraie ajustée
    true_conf = tf.squeeze(true_conf, axis=-1)

    # Masques pour les cellules avec et sans objet
    obj = tf.cast(true_conf == 1, tf.float32)
    obj_area = tf.reduce_sum(obj, axis=-1, keepdims=True)
    noobj_area = tf.reduce_sum(1-obj, axis=-1, keepdims=True)
    noobj = tf.cast(true_conf == 0, tf.float32)
    nb_obj = tf.reduce_sum(obj)



    """
    Calcule la Distance Intersection sur Union (DIoU) entre les boîtes englobantes prédites et les vérités terrain,
    en prenant en compte des tailles d'anchor boxes différentes.
    """
    # Calcul des coins des boîtes
    true_x_min, true_y_min = converted_true_x - converted_true_w / 2, converted_true_y - converted_true_h / 2
    true_x_max, true_y_max = converted_true_x + converted_true_w / 2, converted_true_y + converted_true_h / 2
        
    pred_x_min, pred_y_min = converted_pred_x - converted_pred_w / 2, converted_pred_y - converted_pred_h / 2
    pred_x_max, pred_y_max = converted_pred_x + converted_pred_w / 2, converted_pred_y + converted_pred_h / 2
        
    # Calcul des coordonnées des unions
    inter_x_min = tf.maximum(true_x_min, pred_x_min)
    inter_y_min = tf.maximum(true_y_min, pred_y_min)
    inter_x_max = tf.minimum(true_x_max, pred_x_max)
    inter_y_max = tf.minimum(true_y_max, pred_y_max)
        
    # Calcul des coordonnées des boxes englobantes (la plus petite box englobante couvrant les deux boxes)
    englob_x_min = tf.minimum(true_x_min, pred_x_min)
    englob_y_min = tf.minimum(true_y_min, pred_y_min)
    englob_x_max = tf.maximum(true_x_max, pred_x_max)
    englob_y_max = tf.maximum(true_y_max, pred_y_max)
        
    # Calcul de l'aire de l'intersection
    inter_area = tf.maximum(inter_x_max - inter_x_min, 0) * tf.maximum(inter_y_max - inter_y_min, 0)
    # Calcul d'aire des boîtes
    true_area = (true_x_max - true_x_min) * (true_y_max - true_y_min)
    pred_area = (pred_x_max - pred_x_min) * (pred_y_max - pred_y_min)
    # Calcul de l'aire de l'union
    union_area = true_area + pred_area - inter_area
    # Calcul de la distance carré entre les centres
    center_distance = tf.square(converted_true_x - converted_pred_x) + tf.square(converted_true_y - converted_pred_y)
    # Calcul de la "smallest enclosing box covering the two boxes diagonal line squared"
    max_distance = tf.square( englob_x_max - englob_x_min) + tf.square(englob_y_max - englob_y_min)
    max_box_area = (englob_x_max - englob_x_min) * (englob_y_max - englob_y_min)
    # Calcul de l'IOU
    iou = inter_area / (union_area + 1e-6) # ATTENTION A LA DIVISION PAR ZERO
    # Calcul de la DIoU
    distance_factor = center_distance / (max_distance + 1e-6) # ATTENTION A LA DIVISION PAR ZERO
    diou = iou - distance_factor
    diouloss = (1 - diou) * obj_area
    # Calcul de la GIoU
    giou = iou - (max_box_area - true_area - pred_area + union_area) / (max_box_area + 1e-6)
    giouloss = (1 - giou) * obj_area
    

    """
    Calcul final de la perte
    """
    
    # Calcul de la perte de coordonnées pour les objets
    loss_coord = tf.reduce_sum(giouloss)
    loss_coord = MODEL_LAMBDA_COORD * loss_coord / (nb_obj*len(MODEL_ANCHOR_BOXES)+1e-6)

    
    # Calcul de la perte de d'IOU (le but étant que le modèle apprenne à predite son IOU)
    loss_pred_iou = MODEL_LAMBDA_IOU * tf.reduce_sum(binary_crossentropy(converted_pred_conf*obj_area, iou)) / (nb_obj+1e-6)

    # Calcul de la perte de confiance pour les cellules sans objet
    loss_pred_noobj = MODEL_LAMBDA_NOOBJ * tf.reduce_sum(binary_crossentropy_ls(converted_pred_conf*noobj_area, true_conf*noobj_area)) / (MODEL_BATCH_SIZE * (MODEL_CELLULES[0] * MODEL_CELLULES[1] * len(MODEL_ANCHOR_BOXES)) - nb_obj+1e-6)

    # Calcul de la parte de confiance
    total_loss = loss_coord + loss_pred_iou + loss_pred_noobj
    
    
    best_iou_mean = tf.reduce_sum(tf.reduce_max(iou, axis=-1)) / (nb_obj+1e-6)
    
    return total_loss, (loss_coord, loss_pred_iou, loss_pred_noobj, best_iou_mean)


optimizer=tf.keras.optimizers.Adam(learning_rate=MODEL_LEARNING_RATE)

# Fonction d'entraînement personnalisée
@tf.function
def train_step(inputs, labels):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss, loss_data = yolo_loss(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss, loss_data

# Boucle d'entraînement principale
best_loss_epoch = 0
best_loss = float('inf')
history = {
    'loss' : [],
    'loss_coord' : [],
    'loss_pred_iou' : [],
    'loss_pred_noobj' : [],
    'best_iou_mean' : [],
}
val_history = {
    'val_loss' : [],
    'val_loss_coord' : [],
    'val_loss_pred_iou' : [],
    'val_loss_pred_noobj' : [],
    'val_best_iou_mean' : [],
}
for epoch in range(MODEL_EPOCHS):
    found_better_loss = False
    # Train loop
    with tqdm(total=len(train_generator), desc=f'Training {epoch+1}/{MODEL_EPOCHS}', unit='batch') as pbar:
        epoch_losses = {
            'loss' : [],
            'loss_coord' : [],
            'loss_pred_iou' : [],
            'loss_pred_noobj' : [],
            'best_iou_mean' : [],
        }
        for step in range(len(train_generator)) :
            inputs, labels = train_generator[step]
            # Batch training
            loss, loss_data = train_step(inputs, labels)
            loss_coord, loss_pred_iou, loss_pred_noobj, best_iou_mean = loss_data
            # Metrics
            epoch_losses['loss'].append(loss.numpy())
            epoch_losses['loss_coord'].append(loss_coord.numpy())
            epoch_losses['loss_pred_iou'].append(loss_pred_iou.numpy())
            epoch_losses['loss_pred_noobj'].append(loss_pred_noobj.numpy())
            epoch_losses['best_iou_mean'].append(best_iou_mean.numpy())

            mean_epoch_loss = np.mean(epoch_losses['loss'])
            mean_epoch_loss_coord = np.mean(epoch_losses['loss_coord'])
            mean_epoch_loss_pred_iou = np.mean(epoch_losses['loss_pred_iou'])
            mean_epoch_loss_pred_noobj = np.mean(epoch_losses['loss_pred_noobj'])
            mean_epoch_best_iou_mean = np.mean(epoch_losses['best_iou_mean'])
            pbar.set_postfix({'Loss' :f"{mean_epoch_loss:.6f}",
                              'Loss coord' :f"{mean_epoch_loss_coord:.6f}",
                              'Loss pred iou' :f"{mean_epoch_loss_pred_iou:.6f}",
                              'Loss noobj' :f"{mean_epoch_loss_pred_noobj:.6f}",
                              'Best IOU Mean' :f"{mean_epoch_best_iou_mean:.3f}",
            }
            )
            
            pbar.update()
    train_generator.on_epoch_end()
    history['loss'].append(epoch_losses['loss'])
    history['loss_coord'].append(epoch_losses['loss_coord'])
    history['loss_pred_iou'].append(epoch_losses['loss_pred_iou'])
    history['loss_pred_noobj'].append(epoch_losses['loss_pred_noobj'])
    history['best_iou_mean'].append(epoch_losses['best_iou_mean'])

    # Val loop
    with tqdm(total=len(val_generator), desc=f'Validation {epoch+1}/{MODEL_EPOCHS}', unit='batch') as pbar:
        val_epoch_losses = {
            'val_loss' : [],
            'val_loss_coord' : [],
            'val_loss_pred_iou' : [],
            'val_loss_pred_noobj' : [],
            'val_best_iou_mean' : [],
        }
        for step in range(len(val_generator)) :
            inputs, labels = val_generator[step]
            # Batch inference
            predictions = model(inputs, training=True)
            loss, loss_data = yolo_loss(labels, predictions)
            loss_coord, loss_pred_iou, loss_pred_noobj, best_iou_mean = loss_data
            # Metrics
            val_epoch_losses['val_loss'].append(loss.numpy())
            val_epoch_losses['val_loss_coord'].append(loss_coord.numpy())
            val_epoch_losses['val_loss_pred_iou'].append(loss_pred_iou.numpy())
            val_epoch_losses['val_loss_pred_noobj'].append(loss_pred_noobj.numpy())
            val_epoch_losses['val_best_iou_mean'].append(best_iou_mean.numpy())

            mean_val_epoch_loss = np.mean(val_epoch_losses['val_loss'])
            mean_val_epoch_loss_coord = np.mean(val_epoch_losses['val_loss_coord'])
            mean_val_epoch_loss_pred_iou = np.mean(val_epoch_losses['val_loss_pred_iou'])
            mean_val_epoch_loss_pred_noobj = np.mean(val_epoch_losses['val_loss_pred_noobj'])
            mean_val_epoch_best_iou_mean = np.mean(val_epoch_losses['val_best_iou_mean'])

            pbar.set_postfix({'Loss' :f"{mean_val_epoch_loss:.6f}",
                              'Loss coord' :f"{mean_val_epoch_loss_coord:.6f}",
                              'Loss pred iou' :f"{mean_val_epoch_loss_pred_iou:.6f}",
                              'Loss noobj' :f"{mean_val_epoch_loss_pred_noobj:.6f}",
                              'Best IOU Mean' :f"{mean_val_epoch_best_iou_mean:.3f}",
            }
            )
        
            pbar.update()
    val_epoch_loss = np.mean(val_epoch_losses['val_loss'])
    val_history['val_loss'].append(val_epoch_losses['val_loss'])
    val_history['val_loss_coord'].append(val_epoch_losses['val_loss_coord'])
    val_history['val_loss_pred_iou'].append(val_epoch_losses['val_loss_pred_iou'])
    val_history['val_loss_pred_noobj'].append(val_epoch_losses['val_loss_pred_noobj'])
    val_history['val_best_iou_mean'].append(val_epoch_losses['val_best_iou_mean'])
    
    # Compare epoch with the best epoch yet
    if val_epoch_loss < best_loss :
        best_loss_epoch = epoch
        best_loss = val_epoch_loss
        found_better_loss = True
        print(f'Found a better validation epoch with a mean loss of {best_loss:.6f}')

    # Plot
    if found_better_loss:
        random_id = np.random.randint(0, len(val_generator))
        selected_images, selected_labels = val_generator[random_id]
        random_idx = np.random.randint(0, selected_images.shape[0])
        selected_image = np.expand_dims(selected_images[random_idx], axis=0)
        selected_label = selected_labels[random_idx]
        # Inference
        predictions = model(selected_image, training=True)[0]
        #show_tensor_nd("predictions_conf",predictions[...,4])
        draw_predict(selected_image[0], y_pred=predictions, nms=True) # Use NMS

    # Early stopping
    if epoch - best_loss_epoch >= MODEL_PATIENCE:
        print(f"Training stopped. No improvement was seen in the last {MODEL_PATIENCE} epochs.")
        model.save('models/yolo/yolo_' + datetime.now().strftime('%m-%d') + '.h5')
        break
    print('-'*50)


In [None]:
model.save('models/yolo/yolo_' + datetime.now().strftime('%m-%d') + '.h5')

## Evaluation

### Show training metrics

In [None]:
plot_training_curves(history, val_history)

### Unit test

In [None]:
# Extract data from generator
generator = select_generator()
generator_len = len(generator)
random_id = np.random.randint(0, generator_len)
images, labels = generator[random_id]
idx = np.random.choice(images.shape[0])
image = images[idx]
label = labels[idx]
image = np.expand_dims(image, axis=0)
label = np.expand_dims(label, axis=0)
y_pred = model(image,training=False)

draw_predict(image[0], y_pred[0], nms=True)
