In [56]:
# Initialize Global Variables
CLASS_LABELS = {0: "person", 2: "car", 39: "bottle", 67: "cell phone"}

In [57]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")

In [58]:
# predict on image
model_result = model("cars.jpg")

Ultralytics YOLOv8.0.32  Python-3.9.13 torch-2.0.1+cpu CPU
YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs

image 1/1 C:\Users\xdshr\Downloads\Senior Research Project 2023-24\yolotesting\cars.jpg: 384x640 17 cars, 97.0ms
Speed: 1.0ms pre-process, 97.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)


In [59]:
class BoundBox:
    def __init__(self, xmin, ymin, height, width, confidence=None, class_id=None, class_label=None):
        self.xmin = xmin
        self.ymin = ymin
        self.height = height
        self.width = width
        self.confidence = confidence
        self.class_id = class_id
        self.class_label = class_label
        
    def __init__(self, dic: dict):
        self.xmin = dic["x"]
        self.ymin = dic["y"]
        self.height = dic["h"]
        self.width = dic["w"]
        self.confidence = dic["confidence"]
        self.class_id = dic["class"]
        self.class_label = CLASS_LABELS[self.class_id] if self.class_id in CLASS_LABELS else None

    def get_label(self):
        return self.class_label

    def get_confidence(self):
        return self.confidence

    def get_coordinates(self):
        return (self.xmin, self.ymin, self.xmin + self.width, self.ymin + self.height)

    def __repr__(self):
        return "({:.2f}, {:.2f}, {:.2f}, {:.2f}: {})".format(self.xmin, self.ymin, self.xmin + self.width, self.ymin + self.height, self.class_label)

In [60]:
def parseBoxesToList(model_result):
    boxes = model_result[0].boxes
    parse = list()
    for i in range(len(boxes)):
        xy = boxes[i].xyxy[0].tolist()
        parse.append({'x': xy[0], 'y': xy[1], 'w': xy[2], 'h': xy[3],
                    'confidence': float(boxes.conf[i]), 'class': int(boxes.cls[i])})
    return parse

Low:  {'x': 45.0, 'y': 172.0, 'w': 953.0, 'h': 481.0, 'confidence': 0.9344398975372314, 'class': 2}

High: {'x': 31.0, 'y': 33.0, 'w': 951.0, 'h': 347.0, 'confidence': 0.9449694156646729, 'class': 2}

Right: {'x': 258.0, 'y': 89.0, 'w': 1182.0, 'h': 400.0, 'confidence': 0.951224148273468, 'class': 2}

This means that coordinates start at the top left corner.

In [61]:
boxx = parseBoxesToList(model_result)
def toBBList(boxx):
    return [BoundBox(i) for i in boxx]


[print(i) for i in toBBList(boxx)]
print()

(690.00, 142.00, 1467.00, 321.00: car)
(176.00, 25.00, 434.00, 87.00: car)
(457.00, 142.00, 1000.00, 321.00: car)
(621.00, 77.00, 1319.00, 192.00: car)
(194.00, 88.00, 458.00, 211.00: car)
(471.00, 86.00, 1024.00, 208.00: car)
(50.00, 152.00, 191.00, 342.00: car)
(619.00, 22.00, 1318.00, 83.00: car)
(266.00, 141.00, 610.00, 317.00: car)
(31.00, 26.00, 146.00, 88.00: car)
(757.00, 84.00, 1590.00, 203.00: car)
(831.00, 24.00, 1739.00, 84.00: car)
(0.00, 87.00, 85.00, 211.00: car)
(309.00, 26.00, 694.00, 93.00: car)
(415.00, 28.00, 908.00, 98.00: car)
(926.00, 82.00, 1925.00, 200.00: car)
(944.00, 137.00, 1943.00, 310.00: car)



In [62]:
import cv2
import time

# Access the camera
cap = cv2.VideoCapture(0)

# Initialize variables for calculating FPS
fps_start_time = time.time()
fps_frames = 0

while True:
    # Get each frame as an image
    ret, frame = cap.read()
    
    # Calculate FPS
    fps_frames += 1
    fps = 0
    if time.time() - fps_start_time >= 0:
        fps = fps_frames / (time.time() - fps_start_time)
        fps_start_time = time.time()
        fps_frames = 0

    # Overlay FPS on the image
    cv2.putText(frame, f'FPS: {int(fps)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    result = toBBList((modelresult := parseBoxesToList(model(frame, verbose=False))))
    # print(modelresult)
    for box in result:
        # if box.get_label() is None:
        #     continue
        if box.get_confidence() < 0.5:
            continue
        
        xmin, ymin, xmax, ymax = (w := [int(_) for _ in box.get_coordinates()])
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
        cv2.putText(frame, str(box.get_label()), ((xmin + xmax) // 2, (ymin + ymax) // 2), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 2)
        cv2.putText(frame, str(box.get_confidence()), (xmin + 50, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    # Show the image
    cv2.imshow('frame', frame)
    
    # Exit if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close the window
cap.release()
cv2.destroyAllWindows()


[{'x': 241.0, 'y': 113.0, 'w': 640.0, 'h': 479.0, 'confidence': 0.8801260590553284, 'class': 0}, {'x': 492.0, 'y': 299.0, 'w': 571.0, 'h': 480.0, 'confidence': 0.8570155501365662, 'class': 39}]
[{'x': 243.0, 'y': 116.0, 'w': 640.0, 'h': 479.0, 'confidence': 0.8998746275901794, 'class': 0}, {'x': 492.0, 'y': 299.0, 'w': 571.0, 'h': 480.0, 'confidence': 0.8555653691291809, 'class': 39}]
[{'x': 243.0, 'y': 115.0, 'w': 640.0, 'h': 480.0, 'confidence': 0.9005107283592224, 'class': 0}, {'x': 491.0, 'y': 299.0, 'w': 571.0, 'h': 480.0, 'confidence': 0.840170681476593, 'class': 39}]
[{'x': 242.0, 'y': 113.0, 'w': 640.0, 'h': 479.0, 'confidence': 0.887316882610321, 'class': 0}, {'x': 491.0, 'y': 299.0, 'w': 572.0, 'h': 480.0, 'confidence': 0.7613599896430969, 'class': 39}]
[{'x': 242.0, 'y': 114.0, 'w': 640.0, 'h': 479.0, 'confidence': 0.8916054964065552, 'class': 0}, {'x': 492.0, 'y': 299.0, 'w': 572.0, 'h': 480.0, 'confidence': 0.8114400506019592, 'class': 39}]
[{'x': 242.0, 'y': 114.0, 'w': 6