In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
import time
import os
import psutil
import pynvml
import cv2
import numpy as np
import random
import matplotlib.pyplot as plt

In [18]:
yolo_model_path = "/content/drive/MyDrive/yolo11l_finetune.pt"
rtdetr_model_path = "/content/drive/MyDrive/RT_DETR_finetune.pt"
faster_rcnn_model_path = "/content/drive/MyDrive/fasterrcnn_finetune.pth"

In [4]:
def init_nvml():
    try:
        pynvml.nvmlInit()
        return True
    except pynvml.NVMLError:
        print("No NVIDIA GPU found, GPU memory usage will not be measured.")
        return False

def get_gpu_memory(device_index=0):
    try:
        handle = pynvml.nvmlDeviceGetHandleByIndex(device_index)
        mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
        return mem_info.used / 1024 / 1024  # MB
    except:
        return 0

def get_cpu_memory():
    return psutil.virtual_memory().percent  # %

def get_model_size(model_path):
    return os.path.getsize(model_path) / 1024 / 1024  # MB

## Faster R-CNN

In [5]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126

!pip install cython pyyaml==6.0.2
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
!pip install roboflow albumentations opencv-python-headless

Looking in indexes: https://download.pytorch.org/whl/cu126
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading https://download.pytorch.org/whl/cu126/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.met

In [6]:
!pip install 'git+https://github.com/facebookresearch/detectron2.git'

Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-_qvoi0cf
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-_qvoi0cf
  Resolved https://github.com/facebookresearch/detectron2.git to commit b15f64ec4429e23a148972175a0207c5a9ab84cf
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pycocotools>=2.0.2 (from detectron2==0.6)
  Downloading pycocotools-2.0.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Collecting yacs>=0.1.8 (from detectron2==0.6)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ..

In [7]:
import torch, torchvision
import detectron2
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Version:", torch.version.cuda)
print("Detectron2 Version:", detectron2.__version__)

PyTorch Version: 2.6.0+cu124
CUDA Available: True
CUDA Version: 12.4
Detectron2 Version: 0.6


In [8]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from detectron2.data.catalog import DatasetCatalog

In [21]:
!curl -L "https://app.roboflow.com/ds/ZbgjgVHyou?key=7CwcC0ta7z" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip

[1;30;43mKết quả truyền trực tuyến bị cắt bớt đến 5000 dòng cuối.[0m
 extracting: train/garrafa-022-095_jpg.rf.57265f3edfa51fc4f04efc24c692d710.jpg  
 extracting: train/garrafa-022-095_jpg.rf.e34d7fb6c0555cb2585697b104fab44e.jpg  
 extracting: train/garrafa-022-095_jpg.rf.eb4f33634c547abe2d65d9c952b827d2.jpg  
 extracting: train/garrafa-023-010_jpg.rf.7d5ff8278f86d30a40e0ce60a837488e.jpg  
 extracting: train/garrafa-023-010_jpg.rf.92c3707a80d688ac07eba0e682ffc0f6.jpg  
 extracting: train/garrafa-023-010_jpg.rf.ffdf54a920f1b33e9b326222e57c8a97.jpg  
 extracting: train/garrafa-023-011_jpg.rf.328df87c5659ce04b8c34590d7d73b1b.jpg  
 extracting: train/garrafa-023-011_jpg.rf.6f36659b31019cdc5de0a56f65cdc624.jpg  
 extracting: train/garrafa-023-011_jpg.rf.e40c21a5f84c9a38c03aaa31c75d5b65.jpg  
 extracting: train/garrafa-023-025_jpg.rf.3c8822d2164a0a4e580447657d79ae4e.jpg  
 extracting: train/garrafa-023-025_jpg.rf.5a28b158be4f214cc02bc46c6728b8c6.jpg  
 extracting: train/garrafa-023-025_jpg

In [None]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset_train", {}, "/content/train/_annotations.coco.json", "/content/train")
register_coco_instances("my_dataset_val", {}, "/content/valid/_annotations.coco.json", "/content/valid")
register_coco_instances("my_dataset_test", {}, "/content/test/_annotations.coco.json", "/content/test")


In [None]:
from detectron2.engine import DefaultPredictor
from detectron2.evaluation import inference_on_dataset
from detectron2.data import build_detection_test_loader

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = faster_rcnn_model_path
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 9
cfg.MODEL.DEVICE = "cuda"

predictor = DefaultPredictor(cfg)


[06/26 15:34:45 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from /content/drive/MyDrive/fasterrcnn_finetune.pth ...


In [None]:
from detectron2.evaluation import COCOEvaluator
from detectron2.data import DatasetCatalog

for split in ["my_dataset_train", "my_dataset_val", "my_dataset_test"]:
    print(f"\nEvaluating on {split}...")

    cfg.DATASETS.TEST = (split, )

    evaluator = COCOEvaluator(split, cfg, False, output_dir="./output/")
    val_loader = build_detection_test_loader(cfg, split)

    results = inference_on_dataset(predictor.model, val_loader, evaluator)
    print(results)



🔍 Evaluating on my_dataset_train...
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[06/26 15:35:16 d2.data.datasets.coco]: Loaded 19824 images in COCO format from /content/train/_annotations.coco.json
[06/26 15:35:18 d2.data.build]: Distribution of instances among all 9 categories:
|   category   | #instances   |  category  | #instances   |  category  | #instances   |
|:------------:|:-------------|:----------:|:-------------|:----------:|:-------------|
| bounding-box | 0            |  Battery   | 6212         |   Glass    | 6420         |
|   Medical    | 12851        |   Metal    | 9052         |  Organic   | 9479         |
|    Paper     | 10767        |  Plastic   | 13975        | SmartPhone | 808          |
|              |              |            |              |            |              |
|    total     | 69564        |            |              |            |              |
[06/26 15:35:18 d2.data.dataset_mapper]: [DatasetMapper] A

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[06/26 15:35:21 d2.evaluation.evaluator]: Inference done 11/19824. Dataloading: 0.0011 s/iter. Inference: 0.0723 s/iter. Eval: 0.0002 s/iter. Total: 0.0737 s/iter. ETA=0:24:19
[06/26 15:35:26 d2.evaluation.evaluator]: Inference done 79/19824. Dataloading: 0.0014 s/iter. Inference: 0.0728 s/iter. Eval: 0.0002 s/iter. Total: 0.0745 s/iter. ETA=0:24:30
[06/26 15:35:31 d2.evaluation.evaluator]: Inference done 142/19824. Dataloading: 0.0029 s/iter. Inference: 0.0739 s/iter. Eval: 0.0003 s/iter. Total: 0.0772 s/iter. ETA=0:25:18
[06/26 15:35:36 d2.evaluation.evaluator]: Inference done 207/19824. Dataloading: 0.0028 s/iter. Inference: 0.0742 s/iter. Eval: 0.0003 s/iter. Total: 0.0774 s/iter. ETA=0:25:19
[06/26 15:35:41 d2.evaluation.evaluator]: Inference done 274/19824. Dataloading: 0.0025 s/iter. Inference: 0.0741 s/iter. Eval: 0.0003 s/iter. Total: 0.0769 s/iter. ETA=0:25:03
[06/26 15:35:46 d2.evaluation.evaluator]: Inference done 336/19824. Dataloading: 0.0029 s/iter. Inference: 0.0746 s/i

In [12]:
def evaluate_fasterrcnn_model(model_path, image_paths, conf_threshold=0.5):
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_path
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = conf_threshold
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 9
    cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

    predictor = DefaultPredictor(cfg)

    inference_times = []
    gpu_memories = []
    cpu_memories = []

    has_gpu = init_nvml()

    for img_path in image_paths:
        img = cv2.imread(img_path)
        if img is None:
            continue

        if has_gpu:
            gpu_memories.append(get_gpu_memory())
        cpu_memories.append(get_cpu_memory())

        start_time = time.time()
        predictor(img)
        inference_time = (time.time() - start_time) * 1000  # ms
        inference_times.append(inference_time)

    if has_gpu:
        pynvml.nvmlShutdown()

    avg_inference_time = np.mean(inference_times)
    fps = 1000 / avg_inference_time if avg_inference_time > 0 else 0
    metrics = {
        "avg_inference_time_ms": avg_inference_time,
        "fps": fps,
        "avg_gpu_memory_mb": np.mean(gpu_memories) if gpu_memories else 0,
        "avg_cpu_memory_percent": np.mean(cpu_memories) if cpu_memories else 0,
        "model_size_mb": get_model_size(model_path)
    }

    return metrics

In [24]:
image_paths = [
    "/content/test/000029_jpg.rf.499e295da55d77191edaab374f0c0b34.jpg",
    "/content/test/102_png.rf.2bafc97dd5f432cbcec10f3d97a116b4.jpg",
    "/content/test/1849dark3_jpg.rf.2ac91efb3c7dce8d412ea94032d0b1ed.jpg",
    "/content/test/21_png.rf.941b61c3c473843e9cd9d4341bfa0121.jpg",
    "/content/test/2da85a72c6_jpg.rf.b36ab04a9b51b86dac275b98ccf3f86f.jpg",
    "/content/test/4dbf2e1125_jpg.rf.0567d83e1f172d6ded09c1da2a2bfd63.jpg",
    "/content/test/878971465e_jpg.rf.1ba4e9070233ecfe1353f6b0f1c95848.jpg",
    "/content/test/IMG_1388_jpeg_jpg.rf.dec2e6bbe2a19d7e80e40d1638caf9cb.jpg",
    "/content/test/chipsy-cans_251_jpeg.rf.e79eb383de25ed37a192b68f00623776.jpg",
    "/content/test/download-1-_png.rf.de2dc0e0c54c695ddbc54b8c4b9e98b9.jpg"

]

In [25]:
print("Evaluating Faster R-CNN...")
faster_rcnn_metrics = evaluate_fasterrcnn_model(faster_rcnn_model_path, image_paths)
print("Faster R-CNN Metrics:", faster_rcnn_metrics)

Evaluating Faster R-CNN...
[06/27 14:16:32 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from /content/drive/MyDrive/fasterrcnn_finetune.pth ...


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Faster R-CNN Metrics: {'avg_inference_time_ms': np.float64(259.29713249206543), 'fps': np.float64(3.8565794784892207), 'avg_gpu_memory_mb': np.float64(926.675), 'avg_cpu_memory_percent': np.float64(21.060000000000002), 'model_size_mb': 315.0493965148926}


## YOLO11

In [15]:
%pip install ultralytics
import ultralytics
from ultralytics import YOLO

ultralytics.checks()

Ultralytics 8.3.160 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 42.4/112.6 GB disk)


In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="bSbuPJyAkojNGnXNQwV6")
project = rf.workspace("detectionclassificationgarbage").project("detection-garbage-jkww2")
version = project.version(5)
dataset = version.download("yolov11")


Collecting roboflow
  Downloading roboflow-1.1.66-py3-none-any.whl.metadata (9.7 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting opencv-python-headless==4.10.0.84 (from roboflow)
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pillow-heif>=0.18.0 (from roboflow)
  Downloading pillow_heif-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting python-dotenv (from roboflow)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading roboflow-1.1.66-py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.7/86.7 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading idna-3.7-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Detection-Garbage-5 to yolov11:: 100%|██████████| 1066128/1066128 [00:16<00:00, 63017.43it/s]





Extracting Dataset Version Zip to Detection-Garbage-5 in yolov11:: 100%|██████████| 45348/45348 [00:07<00:00, 5841.59it/s]


In [None]:
model = YOLO(yolo_model_path)

In [None]:
results_YOLO_train = model.val(data='/content/Detection-Garbage-5/data.yaml', split='train')

Ultralytics 8.3.159 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1164.3±639.8 MB/s, size: 35.4 KB)


[34m[1mval: [0mScanning /content/Detection-Garbage-5/train/labels... 19824 images, 168 backgrounds, 0 corrupt: 100%|██████████| 19824/19824 [00:08<00:00, 2298.13it/s]


[34m[1mval: [0mNew cache created: /content/Detection-Garbage-5/train/labels.cache


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1239/1239 [11:44<00:00,  1.76it/s]


                   all      19824      69564      0.866      0.822      0.901      0.709
               Battery       2868       6212      0.945      0.976      0.991       0.91
                 Glass       1659       6420      0.785      0.777      0.858      0.582
               Medical       3561      12851      0.906      0.875      0.944      0.777
                 Metal       2762       9052      0.851      0.779      0.891      0.611
               Organic       1230       9479      0.804      0.694      0.817      0.522
                 Paper       3342      10767      0.847      0.701      0.842      0.642
               Plastic       5439      13975      0.811      0.777      0.872      0.677
            SmartPhone        687        808      0.978      0.994      0.995      0.948
Speed: 0.3ms preprocess, 31.5ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1mruns/detect/val4[0m


In [None]:
print(results_YOLO_train)

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4, 5, 6, 7])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7d468f719f90>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,

In [None]:
results_YOLO_val = model.val(data='/content/Detection-Garbage-5/data.yaml', split='val')

Ultralytics 8.3.159 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


100%|██████████| 755k/755k [00:00<00:00, 25.6MB/s]

[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2119.7±570.5 MB/s, size: 63.4 KB)



[34m[1mval: [0mScanning /content/Detection-Garbage-5/valid/labels... 1935 images, 15 backgrounds, 0 corrupt: 100%|██████████| 1935/1935 [00:00<00:00, 2413.89it/s]


[34m[1mval: [0mNew cache created: /content/Detection-Garbage-5/valid/labels.cache


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 121/121 [01:09<00:00,  1.74it/s]


                   all       1935       6235      0.729      0.697      0.751      0.546
               Battery         50         74      0.786      0.986      0.947      0.868
                 Glass        162        487      0.689      0.591       0.65      0.373
               Medical        158        656      0.675      0.543      0.624       0.44
                 Metal        329       1080      0.774      0.557      0.716      0.426
               Organic        118       1094      0.671      0.575      0.645      0.369
                 Paper        722       1464      0.766      0.657      0.753      0.463
               Plastic        508       1303      0.579      0.692       0.69      0.523
            SmartPhone         61         77       0.89      0.974      0.983      0.911
Speed: 0.4ms preprocess, 30.8ms inference, 0.0ms loss, 1.2ms postprocess per image
Results saved to [1mruns/detect/val2[0m


In [None]:
print(results_YOLO_val)

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4, 5, 6, 7])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7d472c98abd0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,

In [None]:
results_YOLO_test = model.val(data='/content/Detection-Garbage-5/data.yaml', split='test')

Ultralytics 8.3.159 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1026.7±594.0 MB/s, size: 36.5 KB)


[34m[1mval: [0mScanning /content/Detection-Garbage-5/test/labels... 909 images, 4 backgrounds, 0 corrupt: 100%|██████████| 909/909 [00:00<00:00, 2455.14it/s]

[34m[1mval: [0mNew cache created: /content/Detection-Garbage-5/test/labels.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 57/57 [00:35<00:00,  1.62it/s]


                   all        909       3006      0.725      0.704      0.768       0.58
               Battery         78        217      0.837      0.848      0.931      0.794
                 Glass        184        408      0.824      0.723       0.82      0.575
               Medical        189        487      0.626      0.665      0.691      0.529
                 Metal         95        399      0.701      0.637      0.709      0.455
               Organic         63        468      0.679      0.605      0.684      0.405
                 Paper        211        548      0.676      0.626      0.715      0.518
               Plastic        152        448      0.582      0.558      0.631      0.463
            SmartPhone         28         31      0.874      0.968      0.964      0.905
Speed: 0.6ms preprocess, 32.8ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to [1mruns/detect/val3[0m


In [None]:
print(results_YOLO_test)

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4, 5, 6, 7])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7d4690858a90>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,

In [16]:
def evaluate_YOLO_model(model_path, image_paths, conf_threshold=0.5):
    model = YOLO(model_path)
    inference_times = []
    gpu_memories = []
    cpu_memories = []

    has_gpu = init_nvml()

    for img_path in image_paths:
        img = cv2.imread(img_path)
        if img is None:
            continue

        if has_gpu:
            gpu_memories.append(get_gpu_memory())
        cpu_memories.append(get_cpu_memory())

        start_time = time.time()
        model.predict(img, conf=conf_threshold, verbose=False)
        inference_time = (time.time() - start_time) * 1000  # ms
        inference_times.append(inference_time)

    if has_gpu:
        pynvml.nvmlShutdown()

    avg_inference_time = np.mean(inference_times)
    fps = 1000 / avg_inference_time if avg_inference_time > 0 else 0
    metrics = {
        "avg_inference_time_ms": avg_inference_time,
        "fps": fps,
        "avg_gpu_memory_mb": np.mean(gpu_memories) if gpu_memories else 0,
        "avg_cpu_memory_percent": np.mean(cpu_memories),
        "model_size_mb": get_model_size(model_path)
    }

    return metrics

In [26]:
print("Evaluating YOLO11...")
yolo_metrics = evaluate_YOLO_model(yolo_model_path, image_paths)
print("YOLO11 Metrics:", yolo_metrics)

Evaluating YOLO11...
YOLO11 Metrics: {'avg_inference_time_ms': np.float64(94.59056854248047), 'fps': np.float64(10.571878522443827), 'avg_gpu_memory_mb': np.float64(989.675), 'avg_cpu_memory_percent': np.float64(22.5), 'model_size_mb': 145.89209938049316}


## RT-DETR

In [27]:
from ultralytics import RTDETR

In [None]:
model = RTDETR("/content/drive/MyDrive/RT_DETR_finetune.pt")

In [None]:
results_RTDETR_train = model.val(data='/content/Detection-Garbage-5/data.yaml', split='train')

Ultralytics 8.3.160 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
rt-detr-l summary: 302 layers, 32,000,180 parameters, 0 gradients, 103.5 GFLOPs
Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


100%|██████████| 755k/755k [00:00<00:00, 18.0MB/s]

[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1289.8±521.6 MB/s, size: 54.4 KB)



[34m[1mval: [0mScanning /content/Detection-Garbage-5/train/labels... 19824 images, 168 backgrounds, 0 corrupt: 100%|██████████| 19824/19824 [00:09<00:00, 2144.00it/s]


[34m[1mval: [0mNew cache created: /content/Detection-Garbage-5/train/labels.cache


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1239/1239 [12:34<00:00,  1.64it/s]


                   all      19824      69564      0.812      0.783      0.845      0.642
               Battery       2868       6212      0.886      0.962      0.972      0.873
                 Glass       1659       6420      0.741      0.716      0.787      0.504
               Medical       3561      12851      0.869      0.805      0.882      0.671
                 Metal       2762       9052      0.807      0.725      0.826      0.539
               Organic       1230       9479      0.759      0.621      0.729       0.44
                 Paper       3342      10767      0.739      0.711      0.766      0.566
               Plastic       5439      13975      0.748       0.75       0.81      0.605
            SmartPhone        687        808      0.949      0.978      0.989      0.935
Speed: 0.2ms preprocess, 34.4ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/val[0m


In [None]:
print(results_RTDETR_train)

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4, 5, 6, 7])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x78244c9c9c50>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,

In [None]:
results_RTDETR_val = model.val(data='/content/Detection-Garbage-5/data.yaml', split='val')

Ultralytics 8.3.160 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
rt-detr-l summary: 302 layers, 32,000,180 parameters, 0 gradients, 103.5 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 794.0±467.5 MB/s, size: 56.4 KB)


[34m[1mval: [0mScanning /content/Detection-Garbage-5/valid/labels... 1935 images, 15 backgrounds, 0 corrupt: 100%|██████████| 1935/1935 [00:00<00:00, 2348.63it/s]

[34m[1mval: [0mNew cache created: /content/Detection-Garbage-5/valid/labels.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 121/121 [01:15<00:00,  1.61it/s]


                   all       1935       6235       0.71      0.665      0.703      0.512
               Battery         50         74      0.757      0.986      0.947      0.869
                 Glass        162        487      0.599      0.577      0.581      0.332
               Medical        158        656      0.736      0.456      0.553      0.392
                 Metal        329       1080      0.782      0.572      0.681      0.398
               Organic        118       1094      0.672       0.48      0.569      0.324
                 Paper        722       1464      0.703      0.633      0.669      0.396
               Plastic        508       1303      0.608      0.644      0.653      0.482
            SmartPhone         61         77      0.826      0.974      0.973      0.904
Speed: 0.2ms preprocess, 35.0ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/val2[0m


In [None]:
print(results_RTDETR_val)

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4, 5, 6, 7])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7823c5f2ec50>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,

In [None]:
results_RTDETR_test = model.val(data='/content/Detection-Garbage-5/data.yaml', split='test')

Ultralytics 8.3.160 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
rt-detr-l summary: 302 layers, 32,000,180 parameters, 0 gradients, 103.5 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 776.8±236.4 MB/s, size: 62.4 KB)


[34m[1mval: [0mScanning /content/Detection-Garbage-5/test/labels... 909 images, 4 backgrounds, 0 corrupt: 100%|██████████| 909/909 [00:00<00:00, 2457.55it/s]

[34m[1mval: [0mNew cache created: /content/Detection-Garbage-5/test/labels.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 57/57 [00:36<00:00,  1.57it/s]


                   all        909       3006      0.732      0.662      0.715      0.536
               Battery         78        217      0.758       0.82      0.878      0.724
                 Glass        184        408      0.807      0.719      0.791      0.544
               Medical        189        487      0.685      0.614      0.656      0.488
                 Metal         95        399      0.705      0.607      0.672      0.426
               Organic         63        468      0.665      0.449      0.556      0.327
                 Paper        211        548      0.661      0.611      0.653       0.47
               Plastic        152        448      0.587      0.538      0.552      0.405
            SmartPhone         28         31      0.985      0.935      0.962      0.903
Speed: 0.2ms preprocess, 35.0ms inference, 0.0ms loss, 0.6ms postprocess per image
Results saved to [1mruns/detect/val3[0m


In [None]:
print(results_RTDETR_test)

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4, 5, 6, 7])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7823c57a62d0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,

In [28]:
def evaluate_RTDETR_model(model_path, image_paths, conf_threshold=0.5):
    model = RTDETR(model_path)
    inference_times = []
    gpu_memories = []
    cpu_memories = []

    has_gpu = init_nvml()

    for img_path in image_paths:
        img = cv2.imread(img_path)
        if img is None:
            continue

        if has_gpu:
            gpu_memories.append(get_gpu_memory())
        cpu_memories.append(get_cpu_memory())

        start_time = time.time()
        model.predict(img, conf=conf_threshold, verbose=False)
        inference_time = (time.time() - start_time) * 1000  # ms
        inference_times.append(inference_time)

    if has_gpu:
        pynvml.nvmlShutdown()

    avg_inference_time = np.mean(inference_times)
    fps = 1000 / avg_inference_time if avg_inference_time > 0 else 0
    metrics = {
        "avg_inference_time_ms": avg_inference_time,
        "fps": fps,
        "avg_gpu_memory_mb": np.mean(gpu_memories) if gpu_memories else 0,
        "avg_cpu_memory_percent": np.mean(cpu_memories),
        "model_size_mb": get_model_size(model_path)
    }

    return metrics

In [29]:
print("Evaluating RT-DETR...")
rtdetr_metrics = evaluate_RTDETR_model(rtdetr_model_path, image_paths)
print("RT-DETR Metrics:", rtdetr_metrics)

Evaluating RT-DETR...
RT-DETR Metrics: {'avg_inference_time_ms': np.float64(152.4355411529541), 'fps': np.float64(6.560149899665447), 'avg_gpu_memory_mb': np.float64(1036.875), 'avg_cpu_memory_percent': np.float64(22.290000000000003), 'model_size_mb': 63.0955810546875}
