In [79]:
import pandas as pd
import cv2
import ultralytics
import supervision as sv
from ultralytics import YOLO
import numpy as np

model = YOLO('models/yolov8x-pose-p6.pt') 
image = cv2.imread(r'frames\left-object-3\frame_2716.jpg')
result = model.predict(image, save=True)[0]


0: 768x1280 (no detections), 2583.9ms
Speed: 9.3ms preprocess, 2583.9ms inference, 1.0ms postprocess per image at shape (1, 3, 768, 1280)
Results saved to [1mruns\pose\predict2[0m


In [75]:

def callback(image_slice: np.ndarray) -> sv.Detections:
    result = model(image_slice, imgsz=1280, save=True)[0]
    return sv.Detections.from_ultralytics(result)
slicer = sv.InferenceSlicer(callback = callback, slice_wh = (640, 640))


In [76]:
detections = slicer(image)
bounding_box_annotator = sv.BoundingBoxAnnotator()
annotated_frame = bounding_box_annotator.annotate(
    scene=image.copy(),
    detections=detections
)


0: 1280x1280 (no detections), 3112.0ms
Speed: 15.5ms preprocess, 3112.0ms inference, 0.5ms postprocess per image at shape (1, 3, 1280, 1280)
Results saved to [1mruns\detect\predict6[0m

0: 1280x1280 1 handbag, 2944.2ms
Speed: 14.9ms preprocess, 2944.2ms inference, 1.0ms postprocess per image at shape (1, 3, 1280, 1280)
Results saved to [1mruns\detect\predict6[0m

0: 1280x1280 4 persons, 2953.5ms
Speed: 17.5ms preprocess, 2953.5ms inference, 2.0ms postprocess per image at shape (1, 3, 1280, 1280)
Results saved to [1mruns\detect\predict6[0m

0: 1280x768 (no detections), 1925.6ms
Speed: 11.5ms preprocess, 1925.6ms inference, 1.0ms postprocess per image at shape (1, 3, 1280, 768)
Results saved to [1mruns\detect\predict6[0m

0: 1152x1280 (no detections), 2752.9ms
Speed: 13.5ms preprocess, 2752.9ms inference, 1.5ms postprocess per image at shape (1, 3, 1152, 1280)
Results saved to [1mruns\detect\predict6[0m

0: 1152x1280 (no detections), 2827.3ms
Speed: 18.1ms preprocess, 2827.3ms

In [77]:

with sv.ImageSink(target_dir_path='output') as sink:
    sink.save_image(image=annotated_frame, image_name='unattended_obj7s.jpg')

In [59]:
import supervision as sv
from ultralytics import YOLO

tracker = sv.ByteTrack()

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

def callback(frame: np.ndarray, index: int) -> np.ndarray:
    detections = slicer(frame)
    detections = tracker.update_with_detections(detections)

    
    bounding_box_annotator = sv.BoundingBoxAnnotator()
    annotated_frame = bounding_box_annotator.annotate(
        scene=image.copy(),
        detections=detections
    )
    return annotated_frame

sv.process_video(
    source_path=r'dataset\unattended-object\LeftObject_3.avi',
    target_path='new',
    callback=callback
)


0: 640x640 (no detections), 235.2ms
Speed: 5.0ms preprocess, 235.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 surfboard, 233.3ms
Speed: 4.5ms preprocess, 233.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 181.9ms
Speed: 4.0ms preprocess, 181.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 188.2ms
Speed: 4.0ms preprocess, 188.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 train, 1 traffic light, 190.6ms
Speed: 4.0ms preprocess, 190.6ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 traffic light, 189.1ms
Speed: 4.0ms preprocess, 189.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 traffic light, 188.5ms
Speed: 3.0ms preprocess, 188.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x256 1 person, 1 train, 95.9ms
Speed: 2.0ms

KeyboardInterrupt: 