In [4]:
import torch
import numpy as np
import cv2
from time import time  
from ultralytics import YOLO
import random

# pip install filterpy
# pip install lap

import os
from byte_tracker_pytorch.byte_tracker_model import BYTETracker as ByteTracker


class ObjectDetection:

    def __init__(self, capture_index):
       
        self.capture_index = capture_index
        
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print("Using Device: ", self.device)
        
        self.model = self.load_model()
        
        self.CLASS_NAMES_DICT = self.model.model.names
    
    

    def load_model(self):
       
        model = YOLO("yolov8m.pt")  # load a pretrained YOLOv8n model
        model.fuse()
    
        return model


    def predict(self, frame):
       
        results = self.model(frame, verbose=False)
        
        return results
    

    def get_results(self, results):
        
        detections_list = []
        
        # Extract detections for person class
        for result in results[0]:
            class_id = result.boxes.cls.cpu().numpy().astype(int)
            
            if class_id[0] == 0 or class_id[0] == 67:
                    
                bbox = result.boxes.xyxy.cpu().numpy()
                confidence = result.boxes.conf.cpu().numpy()
                
                merged_detection = [bbox[0][0], bbox[0][1], bbox[0][2], bbox[0][3], confidence[0], class_id[0]]
                
                detections_list.append(merged_detection)
            
    
        return np.array(detections_list)
    
    
    def draw_bounding_boxes_with_id(self, img, bboxes, ids):
  
        for bbox, id_ in zip(bboxes, ids):

            cv2.rectangle(img,(int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),(0,0,255),2)
            cv2.putText(img, "ID: " + str(id_), (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 3)

            
        return img
    
    def draw_bounding_boxes_without_id(self, frame, results):
        boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
        classes = results[0].boxes.cls.cpu().numpy().astype(int)

        for box, clss in zip(boxes, classes):
            # Generate a random color for each object based on its ID
            if clss != 0:
                random.seed(int(clss))
                color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3],), color, 2)
                cv2.putText(
                    frame,
                    f"{self.CLASS_NAMES_DICT[clss]}",
                    (box[0], box[1]),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75,
                    (0, 0, 0),
                    2,
                )
        return frame

        
    
    def __call__(self):

        cap = cv2.VideoCapture(self.capture_index)
        assert cap.isOpened()
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

        num = 1 # для сохранения скриншотов

        # ByteTrack param
        first_track_thresh=0.7
        second_track_thresh = 0.25
        match_thresh = 0.4
        track_buffer = 60
        fps = 30
        
        tracker = ByteTracker(fps, first_track_thresh, second_track_thresh, match_thresh, track_buffer, 1)

        while True:
          
            start_time = time()
            
            ret, frame = cap.read()
            assert ret
            
            results = self.predict(frame)
            detections_list = self.get_results(results)
            
            if len(detections_list) == 0:
                detections_list = np.empty((0, 6))
            
            #print(detections_list)

            ## --- Multi object tracking --- 
			#track_list = tracker.update(np.array(detections_list), xyxy=True)
            track_list = tracker.update(torch.tensor(detections_list), xyxy=True)

            #print(track_list)


            # Get id list
            id_list = [t.track_id for t in track_list]

            # Get box list
            box_list = [t.tlbr for t in track_list]

            # Get object class names
            #class_list = [self.object_class_names_list[int(t.class_name)] for t in track_list]

            # Get conf scores
            conf_list = [t.score for t in track_list]

            # Number of objects
            num_objects = len(box_list)

            frame = self.draw_bounding_boxes_with_id(frame, box_list, id_list)
            frame = self.draw_bounding_boxes_without_id(frame, results)

                            
            end_time = time()
            fps = 1/np.round(end_time - start_time, 2)
             
            cv2.putText(frame, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
            
            cv2.imshow('YOLOv8 Detection', frame)
 
            key = cv2.waitKey(1)
            if key == ord("q"):
                break
            elif key == ord("s"):
                cv2.imwrite('images/img' + str(num) + '.png', frame)
                print("image saved!")
                num += 1
    
        cap.release()
        cv2.destroyAllWindows()
        
        
    
detector = ObjectDetection(capture_index=1)
detector()

Using Device:  cuda
YOLOv8m summary (fused): 218 layers, 25886080 parameters, 0 gradients, 78.9 GFLOPs
