In [None]:
!pip install ultralytics

In [None]:
!pip install opencv-python

In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.yaml")  # build a new model from scratch
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)

# Use the model
model.train(data="coco128.yaml", epochs=3)  # train the model
metrics = model.val()  # evaluate model performance on the validation set
results = model("https://ultralytics.com/images/bus.jpg")  # predict on an image
success = model.export(format="onnx")  # export the model to ONNX format

In [2]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
results= model("https://ultralytics.com/images/bus.jpg")


Found https:\ultralytics.com\images\bus.jpg locally at bus.jpg
image 1/1 C:\Users\User\Documents\machine_learning\bus.jpg: 640x480 4 persons, 1 bus, 1 stop sign, 25.0ms
Speed: 6.0ms preprocess, 25.0ms inference, 75.0ms postprocess per image at shape (1, 3, 640, 640)


In [5]:
from PIL import Image
image= Image.open("bus.jpg")
results= model.predict(source=image, save=True)


0: 640x480 4 persons, 1 bus, 1 stop sign, 56.0ms
Speed: 190.3ms preprocess, 56.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns\detect\predict[0m


In [1]:
!yolo mode=predict model=yolov8n.pt source=0

^C


In [None]:
import torch
import numpy as np
import cv2
from time import time
from ultralytics import YOLO

import supervision as sv


class ObjectDetection:

    def __init__(self, capture_index):
       
        self.capture_index = capture_index
        
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print("Using Device: ", self.device)
        
        self.model = self.load_model()
        
        self.CLASS_NAMES_DICT = self.model.model.names
    
        self.box_annotator = sv.BoxAnnotator(sv.ColorPalette.default(), thickness=3, text_thickness=3, text_scale=1.5)
    

    def load_model(self):
       
        # model = YOLO("yolov8m.pt")  # load a pretrained YOLOv8n model
        model = YOLO("yolov8n.pt")
        model.fuse()
    
        return model


    def predict(self, frame):
       
        results = self.model(frame)
        
        return results
    

    def plot_bboxes(self, results, frame):
        
        xyxys = []
        confidences = []
        class_ids = []
        
         # Extract detections for person class
        for result in results:
            boxes = result.boxes.cpu().numpy()
            # class_id = boxes.cls[0]
            # conf = boxes.conf[0]
            # xyxy = boxes.xyxy[0]

            if class_ids == 0.0:
          
              xyxys.append(result.boxes.xyxy.cpu().numpy())
              confidences.append(result.boxes.conf.cpu().numpy())
              class_ids.append(result.boxes.cls.cpu().numpy().astype(int))
            
        
        # Setup detections for visualization
        detections = sv.Detections(
                    xyxy=results[0].boxes.xyxy.cpu().numpy(),
                    confidence=results[0].boxes.conf.cpu().numpy(),
                    class_id=results[0].boxes.cls.cpu().numpy().astype(int),
                    )
        
    
        # Format custom labels
        self.labels = [f"{self.CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
        for _, confidence, class_id, tracker_id
        in detections]
        
        # Annotate and display frame
        frame = self.box_annotator.annotate(scene=frame, detections=detections, labels=self.labels)
        
        return frame
    
    
    
    def __call__(self):

        cap = cv2.VideoCapture(self.capture_index)
        assert cap.isOpened()
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
      
        while True:
          
            start_time = time()
            
            ret, frame = cap.read()
            assert ret
            
            results = self.predict(frame)
            frame = self.plot_bboxes(results, frame)
            
            end_time = time()
            fps = 1/np.round(end_time - start_time, 2)
             
            cv2.putText(frame, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
            
            cv2.imshow('YOLOv8 Detection', frame)
 
            if cv2.waitKey(5) & 0xFF == 27:
                
                break
        
        cap.release()
        cv2.destroyAllWindows()
        
        
    
detector = ObjectDetection(capture_index=0)
detector()

In [None]:
import torch
torch

In [14]:
# import ultralytics 
import supervision as sv
import cv2

from ultralytics import YOLO
image = cv2.imread('image1_50_left.jpg')
model = YOLO('yolov8n.pt')
results = model.predict('image1_50_left.jpg')
print(results[0])


image 1/1 /Users/chanifrusydi/machine_learning/image1_50_left.jpg: 640x480 2 cars, 1 fire hydrant, 65.3ms
Speed: 3.5ms preprocess, 65.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 480)


ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
keys: ['boxes']
masks: None
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'bus', 5: 'train', 6: 'truck', 7: 'traffic light', 8: 'fire hydrant', 9: 'stop sign', 10: 'cat', 11: 'dog'}
orig_img: array([[[114, 137, 122],
        [ 96, 119, 104],
        [ 74,  95,  80],
        ...,
        [103, 152, 120],
        [ 93, 148, 115],
        [ 79, 140, 106]],

       [[ 90, 111,  96],
        [ 78,  99,  84],
        [ 86, 107,  92],
        ...,
        [104, 154, 122],
        [ 91, 148, 115],
        [ 76, 140, 105]],

       [[ 89, 108,  91],
        [ 90, 109,  92],
        [ 82, 100,  83],
        ...,
        [117, 170, 137],
        [101, 161, 127],
        [ 82, 152, 116]],

       ...,

       [[137, 142, 145],
        [139, 144, 147],
        [134, 140, 145],
        ...,
        [147, 162, 171],
        [164, 179, 188],
        [165, 180, 1

In [2]:
len(results)

1

In [3]:
result = results[0]
print(len(result))

3


In [8]:
box = result.boxes[2]
print(len(box))

1


In [9]:
print(box.cls[0], box.xyxy[0], box.conf[0])

tensor(8.) tensor([183.4177, 659.8679, 208.6181, 715.3016]) tensor(0.5735)


In [10]:
class_id, coordinates, confidence = box.cls[0].tolist(), box.xyxy[0].tolist(), box.conf[0].tolist()

In [11]:
class_id, coordinates, confidence

(8.0,
 [183.4176788330078, 659.8678588867188, 208.61813354492188, 715.3016357421875],
 0.5734856128692627)

In [12]:
# rounding the coordinates an== convert class id into class name
coordinates = [round(point) for point in coordinates]
class_name = result.names[box.cls[0].item()]
coordinates, class_name

([183, 660, 209, 715], 'fire hydrant')

In [13]:
print(result.names)

{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'bus', 5: 'train', 6: 'truck', 7: 'traffic light', 8: 'fire hydrant', 9: 'stop sign', 10: 'cat', 11: 'dog'}


In [None]:
class_id, coordinate, confidence = 

In [9]:
len(result.boxes)

3

In [11]:
box = result.boxes[0]
box



ultralytics.engine.results.Boxes object with attributes:

boxes: tensor([[182.7726, 662.9521, 208.8178, 714.5112,   0.9284,  10.0000]])
cls: tensor([10.])
conf: tensor([0.9284])
data: tensor([[182.7726, 662.9521, 208.8178, 714.5112,   0.9284,  10.0000]])
id: None
is_track: False
orig_shape: (991, 743)
shape: torch.Size([1, 6])
xywh: tensor([[195.7952, 688.7317,  26.0452,  51.5591]])
xywhn: tensor([[0.2635, 0.6950, 0.0351, 0.0520]])
xyxy: tensor([[182.7726, 662.9521, 208.8178, 714.5112]])
xyxyn: tensor([[0.2460, 0.6690, 0.2810, 0.7210]])

In [None]:
def non_max_suppression(
        prediction,
        conf_thres =  0.6
        iou_thres = 0.45,
        classes = None,
        agnostic = False,
        multi_label = False,
        labels = ().
        max_det = 300,
        nc = 12,
        max_time_img = 0.1,
        

In [27]:
def postprocess(self, preds, img, original_images):
    preds = non_max_suppression(preds, conf_thres=0.6, iou_thres= 0.45, agnostic=False, max_det =3000, classes=None)
    results=[]
    for i, pred in enumerate(preds):
        original_images = original_images[i] if isinstance(original_images, list) else original_images
        if not isinstance(original_images, torch.Tensor):
            original_images = torch.from_numpy(original_images).to(self.device)
        path = self.