In [23]:
import torch
import torchvision
from torchvision.transforms import Compose, ToTensor, Normalize, Resize
from torchvision.transforms import functional as F
from transformers import DetrForObjectDetection, DetrImageProcessor
from PIL import Image
from prettytable import PrettyTable
from ultralytics import YOLO
import time

In [24]:
# Initialize table to record inference times
table = PrettyTable()
table.field_names = ["Model", "Inference Time (s)"]

In [25]:
# Load image and preprocess
image_path = "/home/ubuntu_qa/codevs/Computer vision/Kien～先生/Cloud/Image02.jpg"
image = Image.open(image_path).convert("RGB")

In [26]:
# ---------- Faster R-CNN ----------
model_faster_rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model_faster_rcnn.eval()

transformed_image = F.to_tensor(image).unsqueeze(0)  
start_time = time.time()
with torch.no_grad():
    outputs_faster_rcnn = model_faster_rcnn(transformed_image)
end_time = time.time()
table.add_row(["Faster R-CNN", f"{end_time - start_time:.4f}"])

In [27]:
# ---------- DETR ----------
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model_detr = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
model_detr.eval()

start_time = time.time()
inputs_detr = processor(images=image, return_tensors="pt")
outputs_detr = model_detr(**inputs_detr)
end_time = time.time()
table.add_row(["DETR", f"{end_time - start_time:.4f}"])

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [28]:
# ---------- YOLOv5 ----------
model_yolov5 = YOLO('./yolov5s.pt')  
model_yolov5.eval()

# Preprocessing for YOLOv5
transform_yolo = Compose([
    Resize((640, 640)), 
    ToTensor(),         
    Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]) 
])
transformed_image_yolo = transform_yolo(image).unsqueeze(0)  

start_time = time.time()
with torch.no_grad():
    outputs_yolo = model_yolov5(transformed_image_yolo)
end_time = time.time()
table.add_row(["YOLOv5", f"{end_time - start_time:.4f}"])

PRO TIP 💡 Replace 'model=./yolov5s.pt' with new 'model=./yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.


0: 640x640 1 cat, 1 chair, 76.1ms
Speed: 4.3ms preprocess, 76.1ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 640)


In [29]:
print(table)

+--------------+--------------------+
|    Model     | Inference Time (s) |
+--------------+--------------------+
| Faster R-CNN |       1.0380       |
|     DETR     |       0.8038       |
|    YOLOv5    |       0.4483       |
+--------------+--------------------+
