In [1]:
import cv2
from ultralytics import YOLOWorld
import supervision as sv
import os

In [2]:
# Путь к изображениям
PATH_TO_IMAGES = "sample_substraction"
PATH_TO_DEBUG = "debug"
BATCH_SIZE = 5
# Список картинок
list_of_images = os.listdir(PATH_TO_IMAGES)
list_of_images_with_relative_path = [os.path.join(PATH_TO_IMAGES, image) for image in list_of_images]

In [3]:
# Инициализируем модель
model = YOLOWorld("yolov8x-worldv2.pt")

# Задаем промпт для модели
classes = ["license_plate", ""]
model.set_classes(classes)

# Создаем датасет
dataset = sv.DetectionDataset(classes=classes, 
                              images=list_of_images_with_relative_path, 
                              annotations={list_of_images_with_relative_path[i]: None 
                                           for i in range(len(list_of_images_with_relative_path))})

for i in range(0, len(list_of_images_with_relative_path), BATCH_SIZE):
    batch = list_of_images_with_relative_path[i:BATCH_SIZE+i+1]
    lst_of_cv2_instances = [cv2.imread(image) for image in batch]

    # Запускаем детекцию с порогом уверенности
    results = model.predict(lst_of_cv2_instances, conf=0.1, imgsz=(640, 640))

    for index in range(len(results)):
        # Получение детекций для Supervision
        detections = sv.Detections.from_ultralytics(results[index])

        # фильтрация детекций по ширине bounding-box-а
        w = detections.xyxy[:, 2] - detections.xyxy[:, 0]
        h = detections.xyxy[:, 3] - detections.xyxy[:, 1]

        # TODO подобрано эмпирически, надо разобраться
        detections = detections[(w < 500)]

        #  Non-Maximum Suppression (NMS) постобработка для удаления дублирующихся прямоугольников
        filtered_detections = detections.with_nms(threshold=0.5)

        # Инициализация экземпляров классов для боксов и подписей
        annotator = sv.BoxAnnotator()
        label_annotator = sv.LabelAnnotator()

        # вывод доп информации (уверенность)
        labels = [
            f"{class_name} {confidence:.2f}"
            for class_name, confidence
            in zip(filtered_detections['class_name'], filtered_detections.confidence)
        ]

        # Непосредственно, рисуем боксы и подписи
        annotated_image = annotator.annotate(scene = lst_of_cv2_instances[index], 
                                            detections = filtered_detections)
        annotated_image = label_annotator.annotate(scene = annotated_image, 
                                                detections = filtered_detections, 
                                                labels=labels)

        # Сохраняем размеченную картинку
        cv2.imwrite(f"{PATH_TO_DEBUG}/annotated_{batch[index]}", annotated_image)

        # Вносим в датасет инфу по детекциям для конкретной картинки
        dataset.annotations[batch[index]] = filtered_detections



0: 640x640 2 license_plates, 49.5ms
1: 640x640 2 license_plates, 49.5ms
2: 640x640 2 license_plates, 49.5ms
3: 640x640 2 license_plates, 49.5ms
4: 640x640 3 license_plates, 49.5ms
5: 640x640 2 license_plates, 49.5ms
Speed: 4.0ms preprocess, 49.5ms inference, 33.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 license_plates, 46.7ms
1: 640x640 2 license_plates, 46.7ms
2: 640x640 5 license_plates, 46.7ms
3: 640x640 3 license_plates, 46.7ms
4: 640x640 3 license_plates, 46.7ms
5: 640x640 2 license_plates, 46.7ms
Speed: 3.7ms preprocess, 46.7ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 license_plates, 47.0ms
1: 640x640 2 license_plates, 47.0ms
2: 640x640 2 license_plates, 47.0ms
3: 640x640 5 license_plates, 47.0ms
4: 640x640 2 license_plates, 47.0ms
5: 640x640 3 license_plates, 47.0ms
Speed: 3.4ms preprocess, 47.0ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 license_plates, 46.8ms
1: 640x640 2 license

In [4]:
# Экспортируем в YOLO-формат (data_yaml генерируется невалидный для CVAT, нужно создавать самому)
dataset.as_yolo(
    images_directory_path="subbatch_license_plate_dataset/images/train",
    annotations_directory_path="subbatch_license_plate_dataset/labels/train",
)