## Import libs

In [1]:
import torch
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

import os
HOME = os.getcwd()
print(HOME)

import pandas as pd
import time
import cv2

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
torch:  2.0 ; cuda:  cu118
/content


## Install Yolo, Tracker

In [2]:
%cd {HOME}
!git clone https://github.com/ultralytics/yolov5

%cd {HOME}/yolov5
!pip install -r requirements.txt

from IPython import display
display.clear_output()

In [3]:
!pip install supervision==0.10.0
from IPython import display
display.clear_output()

import supervision
print("supervision", supervision.__version__)

supervision 0.10.0


In [4]:
%cd {HOME}
!git clone https://github.com/ifzhang/ByteTrack.git
%cd {HOME}/ByteTrack

# workaround related to https://github.com/roboflow/notebooks/issues/80
!sed -i 's/onnx==1.8.1/onnx==1.14.0/g' requirements.txt
!sed -i 's/onnxruntime==1.8.0/onnxruntime==1.12.0/g' requirements.txt


!pip3 install -q -r requirements.txt
!python3 setup.py -q develop
!pip install -q cython_bbox
!pip install -q onemetric
# workaround related to https://github.com/roboflow/notebooks/issues/112 and https://github.com/roboflow/notebooks/issues/106
!pip install -q loguru lap thop

from IPython import display
display.clear_output()

import sys
sys.path.append(f"{HOME}/ByteTrack")

import yolox
print("yolox.__version__:", yolox.__version__)
%cd {HOME}

yolox.__version__: 0.1.0
/content


## Config Tracker, Supervision

In [5]:
from yolox.tracker.byte_tracker import BYTETracker, STrack
from onemetric.cv.utils.iou import box_iou_batch
from dataclasses import dataclass


@dataclass(frozen=True)
class BYTETrackerArgs:
    track_thresh: float = 0.25
    track_buffer: int = 30
    match_thresh: float = 0.8
    aspect_ratio_thresh: float = 3.0
    min_box_area: float = 1.0
    mot20: bool = False

In [6]:
import supervision
from supervision.draw.color import ColorPalette
from supervision.geometry.core import Point
from supervision.utils.video import VideoInfo
from supervision.utils.video import get_video_frames_generator
from supervision.utils.video import process_video
from supervision.utils.notebook import plot_image as show_frame_in_notebook
from supervision.detection.annotate import Detections, BoxAnnotator
from supervision.detection.line_counter import LineZone as LineCounter, LineZoneAnnotator as LineCounterAnnotator

## Load & Config Model

In [7]:
!gdown  1gglIwqxaH2iTvy6lZlXuAcMpd_U0GCUb # load crowdhuman_yolov5m.pt

Downloading...
From: https://drive.google.com/uc?id=1gglIwqxaH2iTvy6lZlXuAcMpd_U0GCUb
To: /content/crowdhuman_yolov5m.pt
100% 169M/169M [00:03<00:00, 47.2MB/s]


In [8]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path='/content/crowdhuman_yolov5m.pt', force_reload=True)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
YOLOv5 🚀 2023-7-4 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
Model summary: 308 layers, 21041679 parameters, 0 gradients
Adding AutoShape... 


## Some utils for work

In [9]:
def resize_video(in_path, out_path, height:int):
  import moviepy.editor as mp
  clip = mp.VideoFileClip(in_path)
  clip_resized = clip.resize(height=height) # According to moviePy documenation The width is then computed so that the width/height ratio is conserved.
  clip_resized.write_videofile(out_path)
  return


In [10]:
from typing import List

import numpy as np


# converts Detections into format that can be consumed by match_detections_with_tracks function
def detections2boxes(detections: Detections) -> np.ndarray:
    return np.hstack((
        detections.xyxy,
        detections.confidence[:, np.newaxis]
    ))


# converts List[STrack] into format that can be consumed by match_detections_with_tracks function
def tracks2boxes(tracks: List[STrack]) -> np.ndarray:
    return np.array([
        track.tlbr
        for track
        in tracks
    ], dtype=float)


# matches our bounding boxes with predictions
def match_detections_with_tracks(
    detections: Detections,
    tracks: List[STrack]
) -> Detections:
    if not np.any(detections.xyxy) or len(tracks) == 0:
        return np.empty((0,))

    tracks_boxes = tracks2boxes(tracks=tracks)
    iou = box_iou_batch(tracks_boxes, detections.xyxy)
    track2detection = np.argmax(iou, axis=1)

    tracker_ids = [None] * len(detections)

    for tracker_index, detection_index in enumerate(track2detection):
        if iou[tracker_index, detection_index] != 0:
            tracker_ids[detection_index] = tracks[tracker_index].track_id

    return tracker_ids

In [11]:
# returns real coord of line points from normalised coords
def make_line(video_path, p1_n, p2_n):
  V_PARAMS = VideoInfo.from_video_path(video_path)
  w, h = V_PARAMS.width, V_PARAMS.height
  return Point(int(w*p1_n[0]), int(h*p1_n[1])), Point(int(w*p2_n[0]), int(h*p2_n[1]))

In [12]:
def process_frame(frame: np.ndarray, index: int) -> np.ndarray:
        start_time = time.time()
        results = model(frame)
        detections = Detections.from_yolov5(results)

        # filtering out detections with unwanted classes
        detections = detections[(detections.class_id == 1)]

        # tracking detections
        tracks = byte_tracker.update(
            output_results=detections2boxes(detections=detections),
            img_info=frame.shape,
            img_size=frame.shape
        )
        tracker_id = match_detections_with_tracks(detections=detections, tracks=tracks)
        detections.tracker_id = np.array(tracker_id)

        # filtering out detections without trackers
        detections = detections[(detections.tracker_id != None)]


        # updating line counter
        line_counter.trigger(detections=detections)
        line_counter.in_count

        c_in=line_counter.in_count
        c_out=line_counter.out_count

        # annotate frame
        if (SHOW_LABELS):
          # format custom labels
          labels = [
              f"#{tracker_id}"
              for _,_, confidence, class_id, tracker_id
              in detections
          ]
          frame = box_annotator.annotate(scene=frame, detections=detections, labels=labels)
        else:
          frame = box_annotator.annotate(scene=frame, detections=detections, skip_label=True)

        if (SHOW_LINECOUNTER):
          line_annotator.annotate(frame=frame, line_counter=line_counter)
        else:
          cv2.putText(frame, f"{c_in} {c_out}", (20, 20),	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
          cv2.line(frame, (LINE_START.x,LINE_START.y) , (LINE_END.x, LINE_END.y), (255, 255, 255), 1)

        # Save to storage
        DS[index/FPS]=(c_in, c_out)

        # Show process stat
        print(f'frame {index}/{TOTAL_FRAMES} {len(detections)} {(time.time() - start_time):.4f}s {c_in=} {c_out=}')

        return frame

## Load video

In [13]:
%cd {HOME}
# !gdown 1h62b_Dh4Fw8ff2gwBLPC4WemT4MNE__x -O in.mp4 # subway_demo.mp4
# !gdown 1qZ6ROKdzHbQiHdizKfYbecr9qquOQ0Cz -O in.mp4 # subway.mp4 https://drive.google.com/file/d//view?usp=drive_link
# !gdown 1sGr3TgnFVsMrF6i0Eb2rkNxQHS7YdBxV -O in.mp4
!gdown 1kxBHYgVfgyRJsCaXUCotNt8WqUOuvWfU -O in.mp4

/content
Downloading...
From: https://drive.google.com/uc?id=1kxBHYgVfgyRJsCaXUCotNt8WqUOuvWfU
To: /content/in.mp4
100% 2.02M/2.02M [00:00<00:00, 159MB/s]


In [14]:
IN_VIDEO_PATH = f"{HOME}/in.mp4"

VIDEO_INFO = VideoInfo.from_video_path(IN_VIDEO_PATH)
FPS = VIDEO_INFO.fps
TOTAL_FRAMES = VIDEO_INFO.total_frames
SHOW_LABELS : bool = False
SHOW_LINECOUNTER: bool = False

# statistics storage
DS = {}

in_path = IN_VIDEO_PATH
out_path =  f"{HOME}/out.mp4"
stat_path =  f"{HOME}/stat.csv"
p1_n, p2_n = [0.66, 0.42],[0.86, 0.44] # get normalized points it from https://roboflow.github.io/polygonzone/
print(VIDEO_INFO)

VideoInfo(width=596, height=336, fps=25, total_frames=362)


In [15]:
LINE_START, LINE_END = make_line(in_path, p1_n, p2_n)
print(LINE_START, LINE_END)

LBL_SIZE: float = 0.5
LBL_I : int = 1
# create BYTETracker instance
byte_tracker = BYTETracker(BYTETrackerArgs())
# create VideoInfo instance
video_info = VideoInfo.from_video_path(in_path)
# create frame generator
generator = get_video_frames_generator(in_path)
# create LineCounter instance
line_counter = LineCounter(start=LINE_START, end=LINE_END)
# create instance of BoxAnnotator and LineCounterAnnotator
box_annotator = BoxAnnotator(color=ColorPalette.from_hex(['#00ff00']), thickness=LBL_I, text_thickness=LBL_I, text_scale=LBL_SIZE)
line_annotator = LineCounterAnnotator(thickness=LBL_I, text_thickness=LBL_I, text_scale=LBL_SIZE)

process_video(
    source_path=in_path,
    target_path=out_path,
    callback=process_frame
)
pd.DataFrame(DS).to_csv(stat_path, index=False)

Point(x=393, y=141) Point(x=512, y=147)
frame 0/362 19 4.1395s c_in=0 c_out=0
frame 1/362 19 0.0464s c_in=0 c_out=0
frame 2/362 19 0.0287s c_in=0 c_out=0
frame 3/362 18 0.0280s c_in=0 c_out=0
frame 4/362 18 0.0279s c_in=0 c_out=0
frame 5/362 20 0.0288s c_in=0 c_out=0
frame 6/362 19 0.0268s c_in=0 c_out=0
frame 7/362 18 0.0255s c_in=0 c_out=0
frame 8/362 18 0.0243s c_in=0 c_out=0
frame 9/362 18 0.0253s c_in=0 c_out=0
frame 10/362 17 0.0232s c_in=0 c_out=0
frame 11/362 16 0.0233s c_in=0 c_out=0
frame 12/362 15 0.0230s c_in=0 c_out=0
frame 13/362 14 0.0228s c_in=0 c_out=0
frame 14/362 12 0.0199s c_in=0 c_out=0
frame 15/362 13 0.0182s c_in=0 c_out=0
frame 16/362 14 0.0187s c_in=0 c_out=0
frame 17/362 10 0.0246s c_in=0 c_out=0
frame 18/362 12 0.0212s c_in=0 c_out=0
frame 19/362 8 0.0207s c_in=0 c_out=0
frame 20/362 10 0.0178s c_in=0 c_out=0
frame 21/362 8 0.0174s c_in=0 c_out=0
frame 22/362 8 0.0193s c_in=0 c_out=0
frame 23/362 12 0.0160s c_in=0 c_out=0
frame 24/362 12 0.0171s c_in=0 c_out=