In [3]:
from ultralytics import YOLO
import numpy as np
import torch

def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [4]:
# Load models
model_name = "yolov9"
cls_id_attacked = 0
cls_conf_threshold = 0.5
batch_size = 1
device = get_default_device()

detector = YOLO("yolov9t.pt")

# Use the model
results = detector("crop001692.png")  # Predict on an image

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov9t.pt to 'yolov9t.pt'...


100%|██████████| 4.70M/4.70M [00:00<00:00, 42.0MB/s]



image 1/1 c:\Users\danil\Documents\Github\NaturalisticAdversarialPatches\crop001692.png: 640x224 7 persons, 2 cars, 100.5ms
Speed: 3.0ms preprocess, 100.5ms inference, 112.6ms postprocess per image at shape (1, 3, 640, 224)


In [5]:
for i, box in enumerate(results[0].boxes):
    obj_prob = box.conf
    print(f"Object {i+1} probability: {obj_prob}")

Object 1 probability: tensor([0.8569], device='cuda:0')
Object 2 probability: tensor([0.8447], device='cuda:0')
Object 3 probability: tensor([0.8405], device='cuda:0')
Object 4 probability: tensor([0.7501], device='cuda:0')
Object 5 probability: tensor([0.7456], device='cuda:0')
Object 6 probability: tensor([0.6432], device='cuda:0')
Object 7 probability: tensor([0.5573], device='cuda:0')
Object 8 probability: tensor([0.4001], device='cuda:0')
Object 9 probability: tensor([0.3985], device='cuda:0')


In [6]:
# Obtain all bboxes
if model_name == "yolov9":
    bboxes = detector("crop001692.png")

print(f"Boxes retrieved: {bboxes}")


image 1/1 c:\Users\danil\Documents\Github\NaturalisticAdversarialPatches\crop001692.png: 640x224 7 persons, 2 cars, 24.0ms
Speed: 1.0ms preprocess, 24.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 224)
Boxes retrieved: [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cu

In [7]:
# Objective: Retrieve labels and labels_rescale in this format:
labels = []  # format:  (label, x_center, y_center, w, h)  ex:(0 0.5 0.6 0.07 0.22)
labels_rescale = (
    []
)  # format:  (label, confidence, left, top, right, bottom)  ex:(person 0.76 0.6 183.1 113.5 240.3 184.7)

if len(bboxes) == batch_size:
    bbox = bboxes[0]

for b in bbox.boxes:
    detected_class = int(b.cls.cpu().item())
    orig_width, orig_height = bbox.boxes.orig_shape[1], bbox.boxes.orig_shape[0]
    if detected_class == int(cls_id_attacked):
        conf = b.conf.cpu().item()
        # For labels: using xywh format
        x_center, y_center, w, h = (
            b.xywh[0][0].cpu().item() / orig_width,
            b.xywh[0][1].cpu().item() / orig_height,
            b.xywh[0][2].cpu().item() / orig_width,
            b.xywh[0][3].cpu().item() / orig_height,
        )
        label = np.array(
            [detected_class, x_center, y_center, w, h, conf], dtype=np.float32
        )
        labels.append(label)
        # For labels_rescale: using xyxy format
        left, top, right, bottom = (
            b.xyxy[0][0].cpu().item(),
            b.xyxy[0][1].cpu().item(),
            b.xyxy[0][2].cpu().item(),
            b.xyxy[0][3].cpu().item(),
        )
        label_rescale = np.array(
            [detected_class, conf, left, top, right, bottom], dtype=np.float32
        )
        labels_rescale.append(label_rescale)
        
        print(f"{detected_class} {x_center} {y_center} {w} {h}")

labels = np.array(labels)
labels_rescale = np.array(labels_rescale)

print(f"Labels: {labels}")
print(f"Labels rescale: {labels_rescale}")

0 0.331988429391621 0.44909448468768126 0.42728040076249485 0.4036171331206497
0 0.7956454043356788 0.5453458639972448 0.35470454740208507 0.4196703351014175
0 0.6512925103800187 0.1292433506374846 0.3033967681278456 0.2502487253423633
0 0.7328669061723924 0.8797611670261746 0.5326259183567881 0.24022354161379786
0 0.9143650484400869 0.1917237815060361 0.16819813709385348 0.29691840435125433
0 0.4114551165246016 0.09527373701126558 0.21717733421073054 0.19054747402253117
0 0.12664231559298686 0.11378315264000417 0.2522693659296099 0.15173936388055576
Labels: [[          0     0.33199     0.44909     0.42728     0.40362     0.85693]
 [          0     0.79565     0.54535      0.3547     0.41967      0.8447]
 [          0     0.65129     0.12924      0.3034     0.25025     0.75014]
 [          0     0.73287     0.87976     0.53263     0.24022     0.74556]
 [          0     0.91437     0.19172      0.1682     0.29692     0.55727]
 [          0     0.41146    0.095274     0.21718     0.1905

In [8]:
# Calculate loss_det
target_class = cls_id_attacked

results = detector("crop001692.png")
combined_probs = []
for box in results[0].boxes:
    obj_prob = box.conf.cpu()
    if box.cls.cpu().item() == target_class:
            combined_probs.append(obj_prob)
if combined_probs:
        loss_det = torch.mean(torch.stack(combined_probs))
else:
    loss_det = torch.tensor(0.0).to(device)

print(f"Loss Det: {loss_det}")


image 1/1 c:\Users\danil\Documents\Github\NaturalisticAdversarialPatches\crop001692.png: 640x224 7 persons, 2 cars, 29.0ms
Speed: 2.0ms preprocess, 29.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 224)
Loss Det: 0.6504629850387573
