Трекинг всех классов:

In [47]:
import cv2
from ultralytics import YOLO
import random


def process_video_with_tracking(model, input_video_path, show_video=True, save_video=False, output_video_path="output_video.mp4"):
    # Open the input video file
    cap = cv2.VideoCapture(input_video_path)

    if not cap.isOpened():
        raise Exception("Error: Could not open video file.")

    # Get input video frame rate and dimensions
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Define the output video writer
    if save_video:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        results = model.track(frame, iou=0.4, conf=0.5, persist=True, imgsz=608, verbose=False, tracker="botsort.yaml")

        if results[0].boxes.id != None: # this will ensure that id is not None -> exist tracks
            boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
            ids = results[0].boxes.id.cpu().numpy().astype(int)

            for box, id in zip(boxes, ids):
                # Generate a random color for each object based on its ID
                random.seed(int(id))
                color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3],), color, 2)
                cv2.putText(
                    frame,
                    f"Id {id}",
                    (box[0], box[1]),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    (0, 255, 255),
                    2,
                )
        
        if save_video:
            out.write(frame)

        if show_video:
            frame = cv2.resize(frame, (0, 0), fx=0.75, fy=0.75)
            cv2.imshow("frame", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    # Release the input video capture and output video writer
    cap.release()
    if save_video:
        out.release()

    # Close all OpenCV windows
    cv2.destroyAllWindows()

# Example usage:
model = YOLO('runs/detect/train/weights/best.pt')
model.fuse()
process_video_with_tracking(model, "test.mp4", show_video=True, save_video=False, output_video_path="output_video.mp4")



Model summary (fused): 218 layers, 25840918 parameters, 0 gradients, 78.7 GFLOPs


Трекинг лишь на класс птица:

In [49]:
import cv2
from ultralytics import YOLO
import random


def draw_bounding_boxes_without_id(frame, results):
    boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
    classes = results[0].boxes.cls.cpu().numpy().astype(int)

    for box, clss in zip(boxes, classes):
        # Generate a random color for each object based on its ID
        if clss != 0:
            random.seed(int(clss)+8)
            color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
            
            cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3],), color, 2)
            cv2.putText(
                frame,
                f"{model.model.names[clss]}",
                (box[0], box[1]),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (50, 255, 50),
                2,
            )
    return frame


def process_video_with_tracking(input_video_path, show_video=True, save_video=False, output_video_path="output_video.mp4"):
    # Open the input video file
    cap = cv2.VideoCapture(input_video_path)

    if not cap.isOpened():
        raise Exception("Error: Could not open video file.")

    # Get input video frame rate and dimensions
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Define the output video writer
    if save_video:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        results = model.track(frame, iou=0.4, conf=0.5, persist=True, imgsz=608, verbose=False, tracker="bytetrack.yaml", classes=0)
        results_detect = model_detect.predict(frame, iou=0.4, conf=0.5, imgsz=608, verbose=False)
    
        if results[0].boxes.id != None: # this will ensure that id is not None -> exist tracks
            boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
            ids = results[0].boxes.id.cpu().numpy().astype(int)

            for box, id in zip(boxes, ids):
                # Generate a random color for each object based on its ID
                random.seed(int(id))
                color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3],), color, 2)
                cv2.putText(
                    frame,
                    f"Id {id}",
                    (box[0], box[1]),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.70,
                    (0, 255, 255),
                    2,
                )

        if results_detect[0].boxes != None:
            draw_bounding_boxes_without_id(frame, results_detect)
            
        
        if save_video:
            out.write(frame)

        if show_video:
            frame = cv2.resize(frame, (0, 0), fx=0.75, fy=0.75)
            cv2.imshow("frame", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    # Release the input video capture and output video writer
    cap.release()
    if save_video:
        out.release()

    # Close all OpenCV windows
    cv2.destroyAllWindows()
    return results_detect, results

# Example usage:
model = YOLO('runs/detect/train/weights/best.pt')
model_detect = YOLO('runs/detect/train/weights/best.pt')
model.fuse()
model_detect.fuse()
results_detect, results = process_video_with_tracking("test.mp4", show_video=True, save_video=False, output_video_path="output_video.mp4")



Model summary (fused): 218 layers, 25840918 parameters, 0 gradients, 78.7 GFLOPs
Model summary (fused): 218 layers, 25840918 parameters, 0 gradients, 78.7 GFLOPs


---

## Собственный SORT:

по вэбкамере

In [5]:
import torch
import numpy as np
import cv2
from time import time  
from ultralytics import YOLO
import random

# pip install filterpy
# pip install lap

import os
from sort import Sort


class ObjectDetection:

    def __init__(self, capture_index):
       
        self.capture_index = capture_index
        
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print("Using Device: ", self.device)
        
        self.model = self.load_model()
        
        self.CLASS_NAMES_DICT = self.model.model.names

        self.tracker = Sort(max_age=50, min_hits=15, iou_threshold=0.15)
    
    

    def load_model(self):
       
        model = YOLO('runs/detect/train/weights/best.pt')  
        model.fuse()
    
        return model


    def predict(self, frame):
       
        results = self.model(frame, verbose=False, iou=0.4, conf=0.6)
        
        return results
    

    def get_results(self, results):
        
        detections_list = []
        
        # Extract detections for person class
        for result in results[0]:
            class_id = result.boxes.cls.cpu().numpy().astype(int)
            
            if class_id == 0:
                    
                bbox = result.boxes.xyxy.cpu().numpy()
                confidence = result.boxes.conf.cpu().numpy()
                
                merged_detection = [bbox[0][0], bbox[0][1], bbox[0][2], bbox[0][3], confidence[0]]
                
                detections_list.append(merged_detection)
            
    
        return np.array(detections_list)
    
    
    def draw_bounding_boxes_with_id(self, img, bboxes, ids):
  
        for bbox, id_ in zip(bboxes, ids):

            cv2.rectangle(img,(int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),(0,0,255),2)
            cv2.putText(img, "ID: " + str(id_), (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 3)

            
        return img
    
    def draw_bounding_boxes_without_id(self, frame, results):
        boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
        classes = results[0].boxes.cls.cpu().numpy().astype(int)

        for box, clss in zip(boxes, classes):
            # Generate a random color for each object based on its ID
            if clss != 0:
                random.seed(int(clss))
                color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3],), color, 2)
                cv2.putText(
                    frame,
                    f"{self.CLASS_NAMES_DICT[clss]}",
                    (box[0], box[1]),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75,
                    (0, 0, 0),
                    2,
                )
        return frame

        
    
    def __call__(self):

        cap = cv2.VideoCapture(self.capture_index)
        assert cap.isOpened()
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

        num = 1 # для сохранения скриншотов
      
        while True:
          
            start_time = time()
            
            ret, frame = cap.read()
            assert ret
            
            results = self.predict(frame)
            detections_list = self.get_results(results)
            
            # for Tracking
            if len(detections_list) == 0:
                detections_list = np.empty((0, 5))
        
            res = self.tracker.update(detections_list)
            
            boxes_track = res[:,:-1]
            boxes_ids = res[:,-1].astype(int)
            
            frame = self.draw_bounding_boxes_with_id(frame, boxes_track, boxes_ids)
            frame = self.draw_bounding_boxes_without_id(frame, results)
                
            end_time = time()
            fps = 1/np.round(end_time - start_time, 2)
             
            cv2.putText(frame, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
            
            cv2.imshow('YOLOv8 Detection', frame)
 
            key = cv2.waitKey(1)
            if key == ord("q"):
                break
            elif key == ord("s"):
                cv2.imwrite('images/img' + str(num) + '.png', frame)
                print("image saved!")
                num += 1
    
        cap.release()
        cv2.destroyAllWindows()
        
        
    
detector = ObjectDetection(capture_index=0)
detector()

Using Device:  cuda


Model summary (fused): 218 layers, 25840918 parameters, 0 gradients, 78.7 GFLOPs


Обработка по видео:

In [3]:
import torch
import numpy as np
import cv2
from ultralytics import YOLO
import random

# pip install filterpy
# pip install lap

import os
from sort import Sort

class ObjectDetection:

    def __init__(self, video_path, save_video=False, output_file="output.mp4"):
        self.video_path = video_path
        self.save_video = save_video
        self.output_file = output_file
        self.video_writer = None

        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print("Using Device: ", self.device)
        
        self.model = self.load_model()
        
        self.CLASS_NAMES_DICT = self.model.model.names

        self.tracker = Sort(max_age=50, min_hits=15, iou_threshold=0.15)

    def load_model(self):
        model = YOLO('runs/detect/train/weights/best.pt')  
        model.fuse()
        return model

    def predict(self, frame):
        results = self.model(frame, verbose=False, iou=0.4, conf=0.6)
        return results

    def get_results(self, results):
        detections_list = []
        # Extract detections for person class
        for result in results[0]:
            class_id = result.boxes.cls.cpu().numpy().astype(int)
            if class_id == 0:
                bbox = result.boxes.xyxy.cpu().numpy()
                confidence = result.boxes.conf.cpu().numpy()
                merged_detection = [bbox[0][0], bbox[0][1], bbox[0][2], bbox[0][3], confidence[0]]
                detections_list.append(merged_detection)
        return np.array(detections_list)

    def draw_bounding_boxes_with_id(self, img, bboxes, ids):
        for bbox, id_ in zip(bboxes, ids):
            cv2.rectangle(img,(int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),(0,0,255),2)
            cv2.putText(img, "ID: " + str(id_), (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 3)
        return img
    
    def draw_bounding_boxes_without_id(self, frame, results):
        boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
        classes = results[0].boxes.cls.cpu().numpy().astype(int)

        for box, clss in zip(boxes, classes):
            # Generate a random color for each object based on its ID
            if clss != 0:
                random.seed(int(clss))
                color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3],), color, 2)
                cv2.putText(
                    frame,
                    f"{self.CLASS_NAMES_DICT[clss]}",
                    (box[0], box[1]),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75,
                    (0, 0, 0),
                    2,
                )
        return frame

    def __call__(self):
        cap = cv2.VideoCapture(self.video_path)
        assert cap.isOpened()

        if self.save_video:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Используйте 'mp4v' для mp4-формата
            out = cv2.VideoWriter(self.output_file, fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

        num = 1 # для сохранения скриншотов
        while True:
            
            ret, frame = cap.read()
            if not ret:
                break
            
            results = self.predict(frame)
            detections_list = self.get_results(results)
            
            # for Tracking
            if len(detections_list) == 0:
                detections_list = np.empty((0, 5))
        
            res = self.tracker.update(detections_list)
            
            boxes_track = res[:,:-1]
            boxes_ids = res[:,-1].astype(int)
            
            frame = self.draw_bounding_boxes_with_id(frame, boxes_track, boxes_ids)
            frame = self.draw_bounding_boxes_without_id(frame, results)
                
            
            cv2.imshow('YOLOv8 Detection', frame)
            
            if self.save_video:
                out.write(frame)

            key = cv2.waitKey(1)
            if key == ord("q"):
                break
            elif key == ord("s"):
                cv2.imwrite('images/img' + str(num) + '.png', frame)
                print("image saved!")
                num += 1

        if self.save_video:
            out.release()
            
        cap.release()
        cv2.destroyAllWindows()

# Пример использования с сохранением видео
video_path = "test.mp4"
output_file = "output.mp4"
detector = ObjectDetection(video_path=video_path, save_video=False, output_file=output_file)
detector()


Using Device:  cuda


Model summary (fused): 218 layers, 25840918 parameters, 0 gradients, 78.7 GFLOPs
