### Проведем инференс модели RF-DETR

In [1]:
import os
import torch
from tqdm import tqdm
import numpy as np

from rfdetr import RFDETRBase
# from rfdetr.util.coco_classes import COCO_CLASSES
from PIL import Image



In [2]:
os.chdir('..')

from inference_functions.yolo_inf_functions import target_boxes
from inference_functions.yolo_inf_functions import check_class, check_class_rtdetr
from inference_functions.yolo_inf_functions import clip_box, map50_calculate
from inference_functions.yolo_inf_functions import read_reference_video_rfdetr

In [3]:
TEST_IMAGES_DIR = "data/test/test"
ANNOTATIONS_CSV = "data/test/test/_annotations.csv"
CONF_THRESH = 0.5
IOU_THRESHOLD = 0.5

print("TEST_IMAGES_DIR :", TEST_IMAGES_DIR)
print("ANNOTATIONS_CSV :", ANNOTATIONS_CSV)

TEST_IMAGES_DIR : data/test/test
ANNOTATIONS_CSV : data/test/test/_annotations.csv


In [4]:
# Загружаем истинные метки областей детекции из датасета
target_boxes_dict = target_boxes(ANNOTATIONS_CSV)

Уникальных изображений в CSV: 738, общее число bbox: 2783


### Загрузка модели

In [5]:
model = RFDETRBase()
# model.optimize_for_inference(compile=False)

person_class_idx = check_class_rtdetr(model)

Loading pretrain weights
person_class_idx = 1


### Сбор предсказаний модели

In [6]:
preds = []

def find_image_path(fname):
    '''
    Поиск необходимого файла
    '''

    p2 = TEST_IMAGES_DIR + '/' + fname
    return p2

image_list = list(target_boxes_dict.keys())

# Цикл сбора предсказаний
for fname in tqdm(image_list, desc="RF-DETR inference on test images"):
    img_path = find_image_path(fname)
    if img_path is None:
        print(f"Image not found, skipping: {fname}")
        continue
    
    pil_img = Image.open(img_path).convert("RGB")
    detections = model.predict(pil_img, threshold=float(CONF_THRESH))
    det = detections[0] if isinstance(detections, (list, tuple)) else detections

    if not hasattr(det, "xyxy") or len(det.xyxy) == 0:
        continue

    img_w, img_h = pil_img.size

    xyxys = np.array(det.xyxy)
    scores = np.array(det.confidence)
    classes = np.array(det.class_id)

    for box, score, cls in zip(xyxys, scores, classes):
        cls_int = int(cls)
        if cls_int != person_class_idx:
            continue
        
        xyxy = [int(box[0]), int(box[1]), int(box[2]), int(box[3])]
        xyxy = clip_box(xyxy, img_w, img_h)
        preds.append({
            'image_id': fname,
            'score': float(score),
            'bbox': xyxy
        })

print(f"Всего предсказаний (person) собрано: {len(preds)}")

RF-DETR inference on test images:   0%|          | 0/738 [00:00<?, ?it/s]Model is not optimized for inference. Latency may be higher than expected. You can optimize the model for inference by calling model.optimize_for_inference().
RF-DETR inference on test images: 100%|██████████| 738/738 [36:32<00:00,  2.97s/it]  

Всего предсказаний (person) собрано: 2057





In [7]:
map50_calculate(preds, target_boxes_dict, IOU_THRESHOLD)

mAP50 для датасета = 0.5384


In [8]:
video_path = "reference video/crowd.mp4"
save_video_path = "reference video/detected_persons/4_rt-detr_detected_crowd.mp4"

read_reference_video_rfdetr(model, video_path, save_video_path, person_class_idx, conf_thresh=CONF_THRESH)

Video inference: 100%|██████████| 705/705 [33:13<00:00,  2.83s/frame]  

Видео с детекцие сохранено в reference video/detected_persons/4_rt-detr_detected_crowd.mp4. Кадров обработано: 705, отрисовано bbox: 7024



