# Setup

Clone repo, install dependencies and check PyTorch and GPU.

In [1]:
!git clone --recurse-submodules https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet.git  # clone repo
%pip install -qr requirements.txt  # install dependencies

import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Setup complete. Using torch 1.12.1+cu113 (Tesla T4)


# Download data

Get test video from repo and extract the first 2 seconds of it 

In [2]:
%cd /content/Yolov5_StrongSORT_OSNet

# extract 14 seconds worth of video frames of it
!yes | ffmpeg -ss 00:00:00 -i test.avi -t 00:00:14 -c copy out.avi

/content/Yolov5_StrongSORT_OSNet
ffmpeg version 3.4.11-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 7 (Ubuntu 7.5.0-3ubuntu1~18.04)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-lib

## Run inference on video

The ``cv2.imshow()`` and ``cv.imshow()`` functions from the [opencv-python](https://github.com/skvark/opencv-python) package are incompatible with Jupyter notebook; see https://github.com/jupyter/notebook/issues/3935. 

Hence we chose to save it to file in this notebook. Locally you can use the ``--show-vid`` flag in order visualize the tracking in real-time

In [6]:
%pip install pyyaml

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [7]:
!python track.py --yolo-weights /content/Yolov5_StrongSORT_OSNet/yolov5/yolov5m.pt --strong-sort-weights osnet_x0_25_msmt17.pt --iou-thres 0.8 --source test.avi --save-txt --save-vid

  'Cython evaluation (very fast so highly recommended) is '
[34m[1mtrack: [0myolo_weights=['/content/Yolov5_StrongSORT_OSNet/yolov5/yolov5m.pt'], strong_sort_weights=osnet_x0_25_msmt17.pt, config_strongsort=strong_sort/configs/strong_sort.yaml, source=test.avi, imgsz=[640, 640], conf_thres=0.5, iou_thres=0.8, max_det=1000, device=, show_vid=False, save_txt=True, save_conf=False, save_crop=False, save_vid=True, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/track, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, hide_class=False, half=False, dnn=False
[31m[1mrequirements:[0m tb-nightly not found and is required by YOLOv5, attempting auto-update...
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
flake8 5.0.4 requires importlib-metadata<4.3,>=1.1.0; python_version

# Show results

https://stackoverflow.com/questions/60977179/how-to-play-avi-file-in-google-colab

Convert avi to mp4

In [8]:
!ffmpeg -i /content/Yolov5_StrongSORT_OSNet/runs/track/exp2/test.mp4 output.mp4 -y

ffmpeg version 3.4.11-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 7 (Ubuntu 7.5.0-3ubuntu1~18.04)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-li

Get the file content into data_url

In [9]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('output.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

Display it with HTML

In [10]:
HTML("""
<video controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [None]:
from glob import glob

out_path = glob('/content/Yolov5_StrongSORT_OSNet/runs/track/exp2/tracks/*.txt')
out_path

['/content/Yolov5_StrongSORT_OSNet/runs/track/exp2/tracks/test.txt']

In [None]:
########## 객체 id, 좌표 값 저장한 txt 파일 List로 변형하는 함수 ##########

def Text_List(out_path):
  result = [] # 최종적으로 받을 리스트
  for op in out_path:
    with open(op, 'r') as f: # close 없이 txt 파일 읽어와서 f에 적용
      example = f.readlines() # ['첫 번째 줄 \n', '두 번째 줄 \n', '세 번째 줄'] 식으로 저장
      # print(example)
      temp, now_fps, idx = [[]], example[0].split(' ')[0], 0
      # temp => fps(프레임) 별로 받기 위한 2중 리스트
      # now_fps => 가장 첫 번째 프레임 (이전 프레임)
      # idx => temp의 프레임 별 index
      for ex in example:
        if now_fps != ex.split(' ')[0]: # 이전 프레임과 현재 프레임이 다르면 = 다음 프레임으로 넘어가면
          temp.append([]) # temp에 다음 프레임의 빈 리스트를 만들어줌
          idx += 1
          now_fps = ex.split(' ')[0] # 이전 프레임을 현재 프레임으로 업데이트
        temp[idx].append(list(int(x) for x in ex.split(' ')[1:-5])) # txt 파일에서 [id, x, y ,width, height]만 뽑아서 저장
      result.append(temp)
  return result

In [None]:
import numpy as np

just_do = Text_List(out_path)
print(np.shape(just_do), '\n')
for x in just_do:
  print('\n', np.shape(x), '\n')
  for y in x:
    print(np.shape(y))

(1, 353) 


 (353,) 

(16, 5)
(16, 5)
(15, 5)
(18, 5)
(17, 5)
(15, 5)
(15, 5)
(16, 5)
(16, 5)
(16, 5)
(16, 5)
(16, 5)
(15, 5)
(15, 5)
(15, 5)
(14, 5)
(15, 5)
(16, 5)
(15, 5)
(16, 5)
(17, 5)
(17, 5)
(16, 5)
(16, 5)
(16, 5)
(17, 5)
(16, 5)
(15, 5)
(13, 5)
(16, 5)
(18, 5)
(17, 5)
(16, 5)
(15, 5)
(15, 5)
(14, 5)
(15, 5)
(14, 5)
(14, 5)
(14, 5)
(14, 5)
(14, 5)
(14, 5)
(16, 5)
(16, 5)
(17, 5)
(16, 5)
(16, 5)
(17, 5)
(17, 5)
(16, 5)
(17, 5)
(14, 5)
(14, 5)
(16, 5)
(15, 5)
(15, 5)
(15, 5)
(16, 5)
(17, 5)
(17, 5)
(17, 5)
(17, 5)
(17, 5)
(13, 5)
(15, 5)
(15, 5)
(14, 5)
(15, 5)
(15, 5)
(14, 5)
(11, 5)
(12, 5)
(13, 5)
(12, 5)
(13, 5)
(13, 5)
(11, 5)
(11, 5)
(11, 5)
(12, 5)
(12, 5)
(11, 5)
(12, 5)
(11, 5)
(12, 5)
(10, 5)
(10, 5)
(10, 5)
(10, 5)
(11, 5)
(11, 5)
(10, 5)
(10, 5)
(9, 5)
(9, 5)
(11, 5)
(12, 5)
(11, 5)
(11, 5)
(10, 5)
(11, 5)
(10, 5)
(10, 5)
(11, 5)
(13, 5)
(12, 5)
(11, 5)
(11, 5)
(12, 5)
(12, 5)
(11, 5)
(12, 5)
(10, 5)
(15, 5)
(14, 5)
(11, 5)
(12, 5)
(11, 5)
(14, 5)
(13, 5)
(13, 5)
(11,

  result = asarray(a).shape
