## Importing the necessary libraries

In [1]:
import cv2
import os
HOME = os.getcwd()
import numpy as np
import sys
import glob
import time
import torch
from google.colab import files
from IPython import display

## Copy the project repository:

In [2]:
!git clone 'https://github.com/SavasAtt/Detect-Track-and-count-objects-in-live-videos-using-YOLOv8-ByteTrack.git'

Cloning into 'Detect-Track-and-count-objects-in-live-videos-using-YOLOv8-ByteTrack'...
remote: Enumerating objects: 28, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 28 (delta 11), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (28/28), 54.38 MiB | 5.23 MiB/s, done.


## Install YOLOv8


In [3]:
!pip install ultralytics

display.clear_output()

import ultralytics

## Install ByteTrack

[ByteTrack](https://github.com/ifzhang/ByteTrack) is great tracker and we can use it with [YOLOv8].

In [4]:
%cd {HOME}
!git clone https://github.com/ifzhang/ByteTrack.git
!cd ByteTrack && pip3 install -q -r requirements.txt
!cd ByteTrack && python3 setup.py -q develop
!pip install -q cython_bbox
!pip install -q onemetric

from IPython import display
display.clear_output()


import sys
sys.path.append(f"{HOME}/ByteTrack")


import yolox
print("yolox.__version__:", yolox.__version__)

yolox.__version__: 0.1.0


In [5]:
from yolox.tracker.byte_tracker import BYTETracker, STrack
from onemetric.cv.utils.iou import box_iou_batch
from dataclasses import dataclass

## Install Roboflow Supervision

In [6]:
!pip install -i https://test.pypi.org/simple/ supervision



display.clear_output()


import supervision
print("supervision.__version__:", supervision.__version__)

supervision.__version__: 0.2.0


In [7]:
from supervision.draw.color import ColorPalette
from supervision.geometry.utils import Point
from supervision import VideoInfo
from supervision import get_video_frames_generator
from supervision import VideoSink
from supervision.notebook.utils import show_frame_in_notebook
from supervision import Detections, BoxAnnotator
# from supervision.tools.line_counter import LineCounter, LineCounterAnnotator

## Tracking utils and functions:

In [8]:
from typing import List

import numpy as np


# converts Detections into format that can be consumed by match_detections_with_tracks function
def detections2boxes(detections: Detections) -> np.ndarray:
    return np.hstack((
        detections.xyxy,
        detections.confidence[:, np.newaxis]
    ))


# converts List[STrack] into format that can be consumed by match_detections_with_tracks function
def tracks2boxes(tracks: List[STrack]) -> np.ndarray:
    return np.array([
        track.tlbr
        for track
        in tracks
    ], dtype=float)


# matches our bounding boxes with predictions
def match_detections_with_tracks(
    detections: Detections, 
    tracks: List[STrack]
) -> Detections:
    if not np.any(detections.xyxy) or len(tracks) == 0:
        return np.empty((0,))

    tracks_boxes = tracks2boxes(tracks=tracks)
    iou = box_iou_batch(tracks_boxes, detections.xyxy)
    track2detection = np.argmax(iou, axis=1)
    
    tracker_ids = [None] * len(detections)
    
    for tracker_index, detection_index in enumerate(track2detection):
        if iou[tracker_index, detection_index] != 0:
            tracker_ids[detection_index] = tracks[tracker_index].track_id

    return tracker_ids

## Import YOLO and load trained YOLOv8 model:

In [9]:
from ultralytics import YOLO

best_model_path = '/content/Detect-Track-and-count-objects-in-live-videos-using-YOLOv8-ByteTrack/best_model_YOLOv8s.pt'

model = YOLO(best_model_path)

In [10]:
# dict maping class_id to class_name
CLASS_NAMES_DICT = model.model.names
# class_ids of interest - car, motorcycle, bus and truck
CLASS_ID = [0, 1]

In [11]:
import shutil

#make a copy of the invoice to work with
src=f"{HOME}/Detect-Track-and-count-objects-in-live-videos-using-YOLOv8-ByteTrack/test.mp4"
dst=f"{HOME}/Detect-Track-and-count-objects-in-live-videos-using-YOLOv8-ByteTrack/test2.mp4"

# src=f"{HOME}/drive/MyDrive/Object detect and track/test_short.mp4"
# dst=f"{HOME}/drive/MyDrive/Object detect and track/test_short2.mp4"

shutil.copy(src,dst)

'/content/Detect-Track-and-count-objects-in-live-videos-using-YOLOv8-ByteTrack/test2.mp4'

In [12]:
# settings
# LINE_START = Point(0, 50)
# LINE_END = Point(640, 50)
%cd {HOME}
TARGET_VIDEO_PATH = src
TARGET_VIDEO_PATH2 = dst
VideoInfo.from_video_path(TARGET_VIDEO_PATH)

/content


VideoInfo(width=640, height=640, fps=30, total_frames=1800)

## Setting parameters for ByteTracker:

In [13]:
@dataclass(frozen=True)
class BYTETrackerArgs:
    track_thresh: float = 0.7
    track_buffer: int = 100
    match_thresh: float = 0.8
    aspect_ratio_thresh: float = 3.0
    min_box_area: float = 10
    mot20: bool = False

## Now we process each frame in test video and detect, count and track all bolt and nut objects.

the processed video will be downloaded at the end of processing. 

In [14]:
from tqdm.notebook import tqdm

# create BYTETracker instance
byte_tracker = BYTETracker(BYTETrackerArgs())
# create VideoInfo instance
video_info = VideoInfo.from_video_path(TARGET_VIDEO_PATH)
# create frame generator
generator = get_video_frames_generator(TARGET_VIDEO_PATH)
# # create LineCounter instance
# line_counter = LineCounter(start=LINE_START, end=LINE_END)
# create instance of BoxAnnotator and LineCounterAnnotator
box_annotator = BoxAnnotator(thickness=4, text_thickness=1, text_scale=0.6)
# line_annotator = LineCounterAnnotator(thickness=4, text_thickness=4, text_scale=2)
Sink_container = VideoSink(TARGET_VIDEO_PATH2,video_info)
Sink_container.target_path = '/content/output.mp4'
# open target video file
with Sink_container as sink:
    # loop over video frames
    for frame in tqdm(generator, total=video_info.total_frames):
        # model prediction on single frame and conversion to supervision Detections

        start = time.perf_counter()
        results = model(frame)
        detections = Detections(
            xyxy=results[0].boxes.xyxy.cpu().numpy(),
            confidence=results[0].boxes.conf.cpu().numpy(),
            class_id=results[0].boxes.cls.cpu().numpy().astype(int)
        )
        # filtering out detections with unwanted classes
        mask = np.array([class_id in CLASS_ID for class_id in detections.class_id], dtype=bool)
        detections.filter(mask=mask, inplace=True)
        # tracking detections
        tracks = byte_tracker.update(
            output_results=detections2boxes(detections=detections),
            img_info=frame.shape,
            img_size=frame.shape
        )
        tracker_id = match_detections_with_tracks(detections=detections, tracks=tracks)
        detections.tracker_id = np.array(tracker_id)
        # filtering out detections without trackers
        mask = np.array([tracker_id is not None for tracker_id in detections.tracker_id], dtype=bool)
        detections.filter(mask=mask, inplace=True)
        # format custom labels
        labels = [
            f"#{tracker_id} {CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
            for _, confidence, class_id, tracker_id
            in detections
        ]
        # # updating line counter
        # line_counter.update(detections=detections)
        # annotate and display frame
        frame = box_annotator.annotate(scene=frame, detections=detections, labels=labels)
        
        end = time.perf_counter()
        totalTime = end - start
        fps = 1 / totalTime
        cv2.putText(frame, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)

        sink.write_frame(frame)
display.clear_output()
print('Video process is done.')
print('')
print('Now downloading the processed video:')
files.download('/content/output.mp4') 

Video process is done.

Now downloading the processed video:


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## We can also check the processed video here in Colab:

In [None]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('/content/output.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)