In [1]:
from ultralytics import YOLO
import os

models_path = "pretrained_models"
filename = "yolo11n.pt"
input_path = os.path.join(models_path, filename)

model = YOLO(input_path)
labels = list(model.names.values())
n = len(labels)

print(f"Model: {input_path}")
print(f"Labels({n=}): {labels}")


Model: pretrained_models\yolo11n.pt
Labels(n=80): ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [2]:
import cv2

data_path = "data"
filename = "bus.jpg"
input_path = os.path.join(data_path, filename)

frame = cv2.imread(input_path)
try:
    results = model.track(frame, show=True)

    obj_1 = results[0].boxes[0]
    print(obj_1)
finally:
    cv2.waitKey()
    cv2.destroyAllWindows()


0: 640x480 4 persons, 1 bus, 175.8ms
Speed: 6.0ms preprocess, 175.8ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)
ultralytics.engine.results.Boxes object with attributes:

cls: tensor([5.])
conf: tensor([0.9402])
data: tensor([[  3.8328, 229.3642, 796.1946, 728.4123,   1.0000,   0.9402,   5.0000]])
id: tensor([1.])
is_track: True
orig_shape: (1080, 810)
shape: torch.Size([1, 7])
xywh: tensor([[400.0137, 478.8882, 792.3618, 499.0481]])
xywhn: tensor([[0.4938, 0.4434, 0.9782, 0.4621]])
xyxy: tensor([[  3.8328, 229.3642, 796.1946, 728.4123]])
xyxyn: tensor([[0.0047, 0.2124, 0.9830, 0.6745]])


In [36]:
import cv2

data_path = "data"
filename = "bus.jpg"
input_path = os.path.join(data_path, filename)

frame = cv2.imread(input_path)
try:
    results = model.track(frame, show=True)
    print()
    for obj in results[0].boxes:
        id = obj.cls.item() # .item() extracts value of tensor of a single element
        x, y, w, h = obj.xywh[0].numpy() # converting to numpy allows to unpack (readability)
        print(f"Label(id={id:.0f}): {model.names[id]}", end="\n-> ")
        print(f"Location: {x=:.2f}, {y=:.2f}, {w=:.2f}, {h=:.2f}", end="\n\n")
finally:
    cv2.waitKey()
    cv2.destroyAllWindows()




0: 640x480 4 persons, 1 bus, 184.4ms
Speed: 9.0ms preprocess, 184.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)

Label(id=5): bus
-> Location: x=400.01, y=478.89, w=792.36, h=499.05

Label(id=0): person
-> Location: x=740.41, y=636.77, w=138.79, h=483.88

Label(id=0): person
-> Location: x=143.35, y=651.88, w=191.90, h=504.63

Label(id=0): person
-> Location: x=283.76, y=634.56, w=121.41, h=451.75

Label(id=0): person
-> Location: x=34.45, y=714.21, w=68.86, h=316.29



In [15]:
from ultralytics import YOLO, solutions
import cv2
import os

def frame_generator(video_source):
    while video_source.isOpened() and cv2.waitKey(1) == -1:
        read_successfully, main_frame = video_source.read() # 360, 640

        if read_successfully:
            yield main_frame
        
    video_source.release()

models_path = "pretrained_models"
filename = "yolo11n.pt"
yolo_path = os.path.join(models_path, filename)

model = YOLO(input_path)

# Get the video source
path = 'data/cars-highway.mp4'
video_source = cv2.VideoCapture(path)

# Generate frames from the video source
video_generator = frame_generator(video_source) 

speed_region = [(262, 300), (10, 300), (175, 200), (290, 200)]

speed = solutions.SpeedEstimator(
    show=True,  # Display the output
    model=yolo_path,  # Path to the YOLO11 model file.
    region=speed_region,  # Pass region points
    classes=[2, 7],  # If you want to estimate speed of specific classes.
    # line_width=2,  # Adjust the line width for bounding boxes and text display
)

try:
    for frame in video_generator:
        results = model.track(frame, show=False)
        yolo_frame = results[0].plot()
        out = speed.estimate_speed(frame)

finally:
    cv2.destroyAllWindows()

Ultralytics Solutions:  {'region': [(262, 300), (10, 300), (175, 200), (290, 200)], 'show_in': True, 'show_out': True, 'colormap': None, 'up_angle': 145.0, 'down_angle': 90, 'kpts': [6, 8, 10], 'analytics_type': 'line', 'json_file': None, 'records': 5, 'show': True, 'model': 'pretrained_models\\yolo11n.pt', 'classes': [2, 7]}

0: 384x640 11 cars, 1 truck, 85.7ms
Speed: 2.0ms preprocess, 85.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 78.8ms
Speed: 2.0ms preprocess, 78.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 109.4ms
Speed: 1.0ms preprocess, 109.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 105.1ms
Speed: 4.0ms preprocess, 105.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 1 truck, 93.2ms
Speed: 1.0ms preprocess, 93.2ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640

KeyboardInterrupt: 