In [43]:
from ultralytics import YOLO
import numpy as np
import torch

def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [44]:
# Load models
model_name = "yolov10"
cls_id_attacked = 0
cls_conf_threshold = 0.5
batch_size = 1
device = get_default_device()

detector = YOLO("yolov10s.pt")

# Use the model
results = detector("crop001692.png")  # Predict on an image


image 1/1 c:\Users\danil\Documents\Github\NaturalisticAdversarialPatches\crop001692.png: 640x224 5 persons, 3 cars, 1 handbag, 50.2ms
Speed: 2.0ms preprocess, 50.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 224)


In [45]:
for i, box in enumerate(results[0].boxes):
    obj_prob = box.conf
    print(f"Object {i+1} probability: {obj_prob}")

Object 1 probability: tensor([0.8992], device='cuda:0')
Object 2 probability: tensor([0.8829], device='cuda:0')
Object 3 probability: tensor([0.8680], device='cuda:0')
Object 4 probability: tensor([0.7835], device='cuda:0')
Object 5 probability: tensor([0.6953], device='cuda:0')
Object 6 probability: tensor([0.6449], device='cuda:0')
Object 7 probability: tensor([0.5094], device='cuda:0')
Object 8 probability: tensor([0.3783], device='cuda:0')
Object 9 probability: tensor([0.2787], device='cuda:0')


In [46]:
# Obtain all bboxes
if model_name == "yolov10":
    bboxes = detector("crop001692.png")

print(f"Boxes retrieved: {bboxes}")


image 1/1 c:\Users\danil\Documents\Github\NaturalisticAdversarialPatches\crop001692.png: 640x224 5 persons, 3 cars, 1 handbag, 22.0ms
Speed: 1.0ms preprocess, 22.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 224)
Boxes retrieved: [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glas

In [47]:
# Objective: Retrieve labels and labels_rescale in this format:
labels = []  # format:  (label, x_center, y_center, w, h)  ex:(0 0.5 0.6 0.07 0.22)
labels_rescale = (
    []
)  # format:  (label, confidence, left, top, right, bottom)  ex:(person 0.76 0.6 183.1 113.5 240.3 184.7)

if len(bboxes) == batch_size:
    bbox = bboxes[0]

for b in bbox.boxes:
    detected_class = int(b.cls.cpu().item())
    orig_width, orig_height = bbox.boxes.orig_shape[1], bbox.boxes.orig_shape[0]
    if detected_class == int(cls_id_attacked):
        conf = b.conf.cpu().item()
        # For labels: using xywh format
        x_center, y_center, w, h = (
            b.xywh[0][0].cpu().item() / orig_width,
            b.xywh[0][1].cpu().item() / orig_height,
            b.xywh[0][2].cpu().item() / orig_width,
            b.xywh[0][3].cpu().item() / orig_height,
        )
        label = np.array(
            [detected_class, x_center, y_center, w, h, conf], dtype=np.float32
        )
        labels.append(label)
        # For labels_rescale: using xyxy format
        left, top, right, bottom = (
            b.xyxy[0][0].cpu().item(),
            b.xyxy[0][1].cpu().item(),
            b.xyxy[0][2].cpu().item(),
            b.xyxy[0][3].cpu().item(),
        )
        label_rescale = np.array(
            [detected_class, conf, left, top, right, bottom], dtype=np.float32
        )
        labels_rescale.append(label_rescale)
        
        print(f"{detected_class} {x_center} {y_center} {w} {h}")

labels = np.array(labels)
labels_rescale = np.array(labels_rescale)

print(f"Labels: {labels}")
print(f"Labels rescale: {labels_rescale}")

0 0.3275692415553213 0.44910807952526827 0.4222521750342767 0.4074094201461898
0 0.7879468399957316 0.5467519738004821 0.3868217215632761 0.42021480126613253
0 0.7299749614387159 0.8801006131271751 0.5384752892500517 0.23979877374564965
0 0.9152103449335162 0.19541205386274652 0.16742064621274835 0.2850735480713457
0 0.6521441225973975 0.12847087001579266 0.28304922343879346 0.24947608540616842
Labels: [[          0     0.32757     0.44911     0.42225     0.40741     0.89924]
 [          0     0.78795     0.54675     0.38682     0.42021     0.88286]
 [          0     0.72997      0.8801     0.53848      0.2398     0.69532]
 [          0     0.91521     0.19541     0.16742     0.28507     0.64492]
 [          0     0.65214     0.12847     0.28305     0.24948     0.37826]]
Labels rescale: [[          0     0.89924      35.166      211.54      162.69      562.72]
 [          0     0.88286      179.55      290.19      296.37      652.41]
 [          0     0.69532      139.14      655.29   

In [48]:
# Calculate loss_det
target_class = cls_id_attacked

results = detector("crop001692.png")
combined_probs = []
for box in results[0].boxes:
    obj_prob = box.conf.cpu()
    if box.cls.cpu().item() == target_class:
            combined_probs.append(obj_prob)
if combined_probs:
        loss_det = torch.mean(torch.stack(combined_probs))
else:
    loss_det = torch.tensor(0.0).to(device)

print(f"Loss Det: {loss_det}")


image 1/1 c:\Users\danil\Documents\Github\NaturalisticAdversarialPatches\crop001692.png: 640x224 5 persons, 3 cars, 1 handbag, 17.0ms
Speed: 1.0ms preprocess, 17.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 224)
Loss Det: 0.7001213431358337
