In [26]:
import cv2
import supervision as sv
from ultralytics import YOLO

model = YOLO('yolov8s.pt')
bbox_annotator = sv.BoxAnnotator()

In [27]:
# list of classes
print(class_list := model.names)

{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw

In [29]:
# real-time detection
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if ret == True:
        result = model(frame)[0]
        detections = sv.Detections.from_ultralytics(result)
        detections = detections[detections.confidence > 0.5]
        labels = [
            f"{result.names[class_id]}: {confidence:.2f}"
            for class_id, confidence in zip(detections.class_id, detections.confidence)
        ]
        frame = bbox_annotator.annotate(scene=frame, detections=detections, labels=labels)
        
        cv2.imshow('Real-time Detection', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break
    
cap.release()
cv2.destroyAllWindows()



0: 480x640 1 person, 1 teddy bear, 396.1ms
Speed: 2.0ms preprocess, 396.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 395.1ms
Speed: 3.0ms preprocess, 395.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 387.1ms
Speed: 2.0ms preprocess, 387.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 teddy bear, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 407.1ms
Speed: 1.0ms preprocess, 407.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 393.1ms
Speed: 3.0ms preprocess, 393.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 teddy bear, 394.1ms
Speed: 2.0ms preprocess, 394.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 cake, 406.1ms
Speed: 2.0ms preprocess, 406.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 392.1ms
Speed: 3.0ms preprocess, 392.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 391.1ms
Speed: 2.0ms preprocess, 391.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 393.1ms
Speed: 1.0ms preprocess, 393.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 390.1ms
Speed: 3.0ms preprocess, 390.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 396.1ms
Speed: 2.0ms preprocess, 396.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 403.1ms
Speed: 2.0ms preprocess, 403.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 389.1ms
Speed: 3.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 apple, 1 chair, 389.1ms
Speed: 1.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 391.1ms
Speed: 2.0ms preprocess, 391.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 402.1ms
Speed: 2.0ms preprocess, 402.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 387.1ms
Speed: 2.0ms preprocess, 387.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 chair, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 chair, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 1 laptop, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 1 laptop, 398.1ms
Speed: 2.0ms preprocess, 398.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 chair, 1 teddy bear, 393.1ms
Speed: 2.0ms preprocess, 393.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 1 teddy bear, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 2 teddy bears, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 chair, 386.1ms
Speed: 3.0ms preprocess, 386.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 1 laptop, 389.1ms
Speed: 1.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 1 laptop, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 396.1ms
Speed: 2.0ms preprocess, 396.1ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 396.1ms
Speed: 2.0ms preprocess, 396.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 391.1ms
Speed: 2.0ms preprocess, 391.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 teddy bear, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 400.1ms
Speed: 2.0ms preprocess, 400.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 391.1ms
Speed: 2.0ms preprocess, 391.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 dog, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 chair, 391.1ms
Speed: 3.0ms preprocess, 391.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 1 remote, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 remote, 401.1ms
Speed: 1.0ms preprocess, 401.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 cat, 1 chair, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 387.1ms
Speed: 2.0ms preprocess, 387.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 394.2ms
Speed: 2.0ms preprocess, 394.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 394.1ms
Speed: 2.0ms preprocess, 394.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 1 teddy bear, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 remote, 401.1ms
Speed: 2.0ms preprocess, 401.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 387.1ms
Speed: 2.0ms preprocess, 387.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 389.4ms
Speed: 3.0ms preprocess, 389.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 remote, 391.1ms
Speed: 1.0ms preprocess, 391.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 393.1ms
Speed: 2.0ms preprocess, 393.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 chair, 1 remote, 403.1ms
Speed: 2.0ms preprocess, 403.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 394.1ms
Speed: 2.0ms preprocess, 394.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 remote, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 remote, 395.1ms
Speed: 3.0ms preprocess, 395.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 remote, 1 teddy bear, 407.1ms
Speed: 2.0ms preprocess, 407.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 1 remote, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 remote, 1 teddy bear, 391.1ms
Speed: 2.0ms preprocess, 391.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 387.1ms
Speed: 2.0ms preprocess, 387.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 393.1ms
Speed: 2.0ms preprocess, 393.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 1 teddy bear, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 dog, 1 chair, 404.1ms
Speed: 1.0ms preprocess, 404.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 396.1ms
Speed: 3.0ms preprocess, 396.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 chair, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 chair, 1 teddy bear, 391.1ms
Speed: 2.0ms preprocess, 391.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 401.1ms
Speed: 1.0ms preprocess, 401.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 394.1ms
Speed: 2.0ms preprocess, 394.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 397.1ms
Speed: 2.0ms preprocess, 397.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 393.1ms
Speed: 2.0ms preprocess, 393.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 392.1ms
Speed: 1.0ms preprocess, 392.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 405.1ms
Speed: 3.0ms preprocess, 405.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 395.1ms
Speed: 2.0ms preprocess, 395.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 394.1ms
Speed: 1.0ms preprocess, 394.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 391.1ms
Speed: 3.0ms preprocess, 391.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 teddy bear, 391.1ms
Speed: 2.0ms preprocess, 391.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 391.1ms
Speed: 1.0ms preprocess, 391.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 406.8ms
Speed: 2.0ms preprocess, 406.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 404.1ms
Speed: 2.0ms preprocess, 404.1ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 394.1ms
Speed: 2.0ms preprocess, 394.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 dog, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 393.1ms
Speed: 2.0ms preprocess, 393.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 393.1ms
Speed: 2.0ms preprocess, 393.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 389.1ms
Speed: 2.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 403.1ms
Speed: 3.0ms preprocess, 403.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 393.1ms
Speed: 2.0ms preprocess, 393.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 1 teddy bear, 390.1ms
Speed: 2.0ms preprocess, 390.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 390.1ms
Speed: 3.0ms preprocess, 390.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 390.5ms
Speed: 1.0ms preprocess, 390.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 dog, 393.1ms
Speed: 2.0ms preprocess, 393.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 388.1ms
Speed: 2.0ms preprocess, 388.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 teddy bear, 403.1ms
Speed: 2.0ms preprocess, 403.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 390.1ms
Speed: 1.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 1 cake, 392.1ms
Speed: 3.0ms preprocess, 392.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 390.1ms
Speed: 3.0ms preprocess, 390.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 392.1ms
Speed: 2.0ms preprocess, 392.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 2 persons, 1 dog, 391.1ms
Speed: 2.0ms preprocess, 391.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)





0: 480x640 1 person, 389.1ms
Speed: 3.0ms preprocess, 389.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)


