In [None]:
!nvidia-smi

In [None]:
import os
HOME = os.getcwd()
print(HOME)

In [None]:
video_path=f"/content/OJ_input.mp4"

In [None]:
!pip install ultralytics==8.0.10
from IPython import display
display.clear_output()
import ultralytics
ultralytics.checks()

In [None]:
%cd {HOME}
!git clone https://github.com/ifzhang/ByteTrack.git
%cd {HOME}/ByteTrack

# workaround related to https://github.com/roboflow/notebooks/issues/80
!sed -i 's/onnx==1.8.1/onnx==1.9.0/g' requirements.txt

!pip3 install -q -r requirements.txt
!python3 setup.py -q develop
!pip install -q cython_bbox
!pip install -q onemetric
# workaround related to https://github.com/roboflow/notebooks/issues/112 and https://github.com/roboflow/notebooks/issues/106
!pip install loguru lap thop # Remove the -q flag to see installation output

from IPython import display
display.clear_output()


import sys
sys.path.append(f"{HOME}/ByteTrack")


import yolox
print("yolox._version:", yolox.__version__)

In [None]:
from yolox.tracker.byte_tracker import BYTETracker, STrack
from onemetric.cv.utils.iou import box_iou_batch
from dataclasses import dataclass

@dataclass(frozen=True)
class BYTETrackerArgs:
  track_thresh: float = 0.25
  track_buffer: int = 30
  match_thresh: float = 0.8
  aspect_ratio_thresh: float = 3.0
  min_box_area: float = 1.0
  mot20: bool = False

In [None]:
!pip install supervision==0.1.0


from IPython import display
display.clear_output()


import supervision
print("supervision.__version__:", supervision.__version__)

In [None]:
MODEL = "yolov8x.pt"

In [None]:
from ultralytics import YOLO

model = YOLO(MODEL)
model.fuse()

In [None]:
%cd {HOME}
!yolo task=detect mode=predict model=yolov8x.pt conf=0.25 source={video_path}

In [None]:
CLASS_NAMES_DICT = model.model.names

In [None]:
from supervision.geometry.dataclasses import Point

LINE_START = Point(50,1500)
LINE_END = Point(3840-50,1500)

In [None]:
TARGET_PATH=f"{HOME}/content/object_detection.mp4"

In [None]:
from supervision.video.dataclasses import VideoInfo

VideoInfo.from_video_path(video_path)

In [None]:
from typing import List
from supervision.tools.detections import Detections, BoxAnnotator

import numpy as np


# converts Detections into format that can be consumed by match_detections_with_tracks function
def detections2boxes(detections: Detections) -> np.ndarray:
    return np.hstack((
        detections.xyxy,
        detections.confidence[:, np.newaxis]
    ))


# converts List[STrack] into format that can be consumed by match_detections_with_tracks function
def tracks2boxes(tracks: List[STrack]) -> np.ndarray:
    return np.array([
        track.tlbr
        for track
        in tracks
    ], dtype=float)


# matches our bounding boxes with predictions
def match_detections_with_tracks(
    detections: Detections,
    tracks: List[STrack]
) -> Detections:
    if not np.any(detections.xyxy) or len(tracks) == 0:
        return np.empty((0,))

    tracks_boxes = tracks2boxes(tracks=tracks)
    iou = box_iou_batch(tracks_boxes, detections.xyxy)
    track2detection = np.argmax(iou, axis=1)

    tracker_ids = [None] * len(detections)

    for tracker_index, detection_index in enumerate(track2detection):
        if iou[tracker_index, detection_index] != 0:
            tracker_ids[detection_index] = tracks[tracker_index].track_id

    return tracker_ids

In [None]:
from supervision.video.source import get_video_frames_generator

# importing utility for displaying the picked frame in notebook
from supervision.notebook.utils import show_frame_in_notebook
from supervision.tools.detections import Detections, BoxAnnotator
from supervision.draw.color import ColorPalette
from supervision.video.sink import VideoSink
from supervision.tools.line_counter import LineCounter, LineCounterAnnotator
from supervision.geometry.dataclasses import Point

from tqdm.notebook import tqdm

byte_tracker = BYTETracker(BYTETrackerArgs())

# frame generator used to read frames one after another
generator = get_video_frames_generator(video_path)

line_counter = LineCounter(start=Point(50, 1500), end=Point(3840-50, 1500))

box_annotator = BoxAnnotator(color=ColorPalette(), thickness=4, text_thickness=4, text_scale=2)

# line annotator for displaying the line
line_annotator = LineCounterAnnotator(thickness=4, text_thickness = 4, text_scale=2)

video_info = VideoInfo.from_video_path(video_path)

with VideoSink(TARGET_PATH, video_info) as sink:
  for frame in tqdm(generator, total=video_info.total_frames):

    # picking a frame from the generator
    # iterator = iter(generator)

    # # picking the next frame
    # frame = next(iterator)

    results = model(frame)[0]

    detections = Detections(
        xyxy = results.boxes.xyxy.cpu().numpy(),
        confidence = results.boxes.conf.cpu().numpy(),
        class_id = results.boxes.cls.cpu().numpy().astype(int)
    )

    tracks = byte_tracker.update(
        output_results = detections2boxes(detections=detections),
        img_info = frame.shape,
        img_size = frame.shape
    )
    tracker_id = match_detections_with_tracks(detections=detections, tracks=tracks)
    detections.tracker_id = np.array(tracker_id)

    labels = [
        f"#{tracker_id} {CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
        for _, confidence, class_id, tracker_id in detections
    ]

    line_counter.update(detections=detections)

    frame = box_annotator.annotate(frame=frame, detections=detections,labels=labels)
    line_annotator.annotate(frame=frame, line_counter=line_counter)

    sink.write_frame(frame)