In [8]:
from ultralytics import YOLO

# Load a pretrained YOLO11n model
model = YOLO("yolo11n.pt")

# Run inference on an image
results = model("../data/bus.jpg")  # list of 1 Results object

for res in results:
    # print(res.boxes)  # Boxes object for bbox outputs
    # print(res.masks)  # Masks object for segmentation masks outputs
    # print(res.probs)  # Class probabilities for classification outputs
    # print(res.names) # Get class names
    # print(res.__getitem__(0)) # Get the first result
    # print(res.__len__()) # Get the number of results
    print(res.tojson())


image 1/1 /home/ahmedcr7/Programming/Python/AI/computer vision/yolov11/../data/bus.jpg: 640x480 4 persons, 1 bus, 358.4ms
Speed: 13.2ms preprocess, 358.4ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 480)
[
  {
    "name": "bus",
    "class": 5,
    "confidence": 0.93852,
    "box": {
      "x1": 8.13938,
      "y1": 229.21269,
      "x2": 794.70795,
      "y2": 727.07574
    }
  },
  {
    "name": "person",
    "class": 0,
    "confidence": 0.89054,
    "box": {
      "x1": 670.39191,
      "y1": 394.84122,
      "x2": 809.8111,
      "y2": 879.21283
    }
  },
  {
    "name": "person",
    "class": 0,
    "confidence": 0.87899,
    "box": {
      "x1": 47.24603,
      "y1": 400.22824,
      "x2": 239.33875,
      "y2": 903.42273
    }
  },
  {
    "name": "person",
    "class": 0,
    "confidence": 0.85317,
    "box": {
      "x1": 223.40105,
      "y1": 408.5668,
      "x2": 344.40811,
      "y2": 860.7735
    }
  },
  {
    "name": "person",
    "class": 0,
    "co

In [5]:
import numpy as np
import cv2
from time import time
from ultralytics import YOLO


class ObjectDetection():
    def __init__(self, capture=0):
        self.capture_idx = capture
        self.model = self.load_model()
    
    def load_model(self):
        model = YOLO('yolo11n.pt')
        return model
    
    def predict(self, frame):
        result = self.model(frame)
        return result

    def plot_boxes(self, results, frame):
        confidence, class_idx = [],[]

        for res in results:
            boxes = res.boxes.numpy()

            confidence.append(boxes.conf)
            class_idx.append(boxes.cls)
            frame = results[0].plot()
        
        print(f"confidence -> {confidence}")
        print(f"class_idx -> {class_idx}")

        return frame
    
    def __call__(self):
        cap = cv2.VideoCapture(self.capture_idx)
        print(cap.isOpened())

        if cap.isOpened():
            cap.set(3, 1080)  # Width
            cap.set(4, 720)   # Height

            while True:
                start_time = time()

                ret, frame = cap.read()

                if not ret:
                    break

                results = self.predict(frame)
                frame = self.plot_boxes(results, frame)

                end_time = time()

                fps = 1/np.round(end_time - start_time, 2)

                cv2.putText(frame, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
                cv2.imshow('frame', frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
    
        cap.release()
        cv2.destroyAllWindows()

detector = ObjectDetection(0)
detector()

True

0: 480x640 1 person, 147.2ms
Speed: 2.9ms preprocess, 147.2ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)
confidence -> [array([    0.93325], dtype=float32)]
class_idx -> [array([          0], dtype=float32)]

0: 480x640 1 person, 212.8ms
Speed: 41.4ms preprocess, 212.8ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)
confidence -> [array([    0.92819], dtype=float32)]
class_idx -> [array([          0], dtype=float32)]

0: 480x640 1 person, 531.4ms
Speed: 4.2ms preprocess, 531.4ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)
confidence -> [array([    0.92888], dtype=float32)]
class_idx -> [array([          0], dtype=float32)]

0: 480x640 1 person, 366.3ms
Speed: 11.3ms preprocess, 366.3ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)
confidence -> [array([    0.93194], dtype=float32)]
class_idx -> [array([          0], dtype=float32)]

0: 480x640 1 person, 248.8ms
Speed: 18.0ms preprocess, 248.8ms infer

In [2]:
from ultralytics import YOLO
model = YOLO("yolo11m.pt")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt to 'yolo11m.pt'...


100%|██████████| 38.8M/38.8M [20:24<00:00, 33.2kB/s]  
