In [7]:
from ultralytics import YOLO
import numpy as np
import torch

def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [8]:
# Load models
model_name = "yolov8"
cls_id_attacked = 0
cls_conf_threshold = 0.5
batch_size = 1
device = get_default_device()

detector = YOLO("yolov8n.pt")  # It will download locally the first time

# Use the model
results = detector("https://ultralytics.com/images/bus.jpg")  # Predict on an image


Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 c:\Users\danil\Desktop\Naturalistic-Adversarial-Patch\bus.jpg: 640x480 4 persons, 1 bus, 1 stop sign, 106.0ms
Speed: 4.0ms preprocess, 106.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)


In [9]:
for i, box in enumerate(results[0].boxes):
    obj_prob = box.conf
    print(f"Object {i+1} probability: {obj_prob}")

Object 1 probability: tensor([0.8705], device='cuda:0')
Object 2 probability: tensor([0.8690], device='cuda:0')
Object 3 probability: tensor([0.8536], device='cuda:0')
Object 4 probability: tensor([0.8193], device='cuda:0')
Object 5 probability: tensor([0.3461], device='cuda:0')
Object 6 probability: tensor([0.3013], device='cuda:0')


In [10]:
# Obtain all bboxes
if model_name == "yolov8":
    bboxes = detector("https://ultralytics.com/images/bus.jpg")

print(f"Boxes retrieved: {bboxes}")


Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 c:\Users\danil\Desktop\Naturalistic-Adversarial-Patch\bus.jpg: 640x480 4 persons, 1 bus, 1 stop sign, 12.0ms
Speed: 3.0ms preprocess, 12.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)
Boxes retrieved: [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 

In [11]:
# Objective: Retrieve labels and labels_rescale in this format:
labels = []  # format:  (label, x_center, y_center, w, h)  ex:(0 0.5 0.6 0.07 0.22)
labels_rescale = (
    []
)  # format:  (label, confidence, left, top, right, bottom)  ex:(person 0.76 0.6 183.1 113.5 240.3 184.7)

if len(bboxes) == batch_size:
    bbox = bboxes[0]

for b in bbox.boxes:
    detected_class = int(b.cls.cpu().item())
    orig_width, orig_height = bbox.boxes.orig_shape[1], bbox.boxes.orig_shape[0]
    if detected_class == int(cls_id_attacked):
        conf = b.conf.cpu().item()
        # For labels: using xywh format
        x_center, y_center, w, h = (
            b.xywh[0][0].cpu().item() / orig_width,
            b.xywh[0][1].cpu().item() / orig_height,
            b.xywh[0][2].cpu().item() / orig_width,
            b.xywh[0][3].cpu().item() / orig_height,
        )
        label = np.array(
            [detected_class, x_center, y_center, w, h, conf], dtype=np.float32
        )
        labels.append(label)
        # For labels_rescale: using xyxy format
        left, top, right, bottom = (
            b.xyxy[0][0].cpu().item(),
            b.xyxy[0][1].cpu().item(),
            b.xyxy[0][2].cpu().item(),
            b.xyxy[0][3].cpu().item(),
        )
        label_rescale = np.array(
            [detected_class, conf, left, top, right, bottom], dtype=np.float32
        )
        labels_rescale.append(label_rescale)

labels = np.array(labels)
labels_rescale = np.array(labels_rescale)

print(f"Labels: {labels}")
print(f"Labels rescale: {labels_rescale}")

Labels: [[          0     0.18101     0.60267     0.24168     0.46596     0.86898]
 [          0     0.91366     0.58147     0.17233     0.45871      0.8536]
 [          0     0.34945     0.58481     0.15225     0.41815     0.81931]
 [          0    0.041423      0.6597    0.082846     0.29901     0.30129]]
Labels rescale: [[          0     0.86898      48.739      399.26       244.5       902.5]
 [          0      0.8536      670.27      380.28      809.86      875.69]
 [          0     0.81931      221.39      405.79      344.72      857.39]
 [          0     0.30129           0      551.01      67.105      873.94]]


In [12]:
# Calculate loss_det
target_class = cls_id_attacked

results = detector("https://ultralytics.com/images/bus.jpg")
combined_probs = []
for box in results[0].boxes:
    obj_prob = box.conf.cpu()
    if box.cls.cpu().item() == target_class:
            combined_probs.append(obj_prob)
if combined_probs:
        loss_det = torch.mean(torch.stack(combined_probs))
else:
    loss_det = torch.tensor(0.0).to(device)

print(f"Loss Det: {loss_det}")


Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 c:\Users\danil\Desktop\Naturalistic-Adversarial-Patch\bus.jpg: 640x480 4 persons, 1 bus, 1 stop sign, 22.0ms
Speed: 3.0ms preprocess, 22.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 480)
Loss Det: 0.7107957005500793
