## 1. Data Import

In [26]:
!python --version

Python 3.10.12


In [1]:
import pandas as pd
import numpy as np
import torch

In [2]:
# upload sample file: 'VIRAT_S_000200_03_000657_000899_first300.mp4'
# this is the first 300 frames of 'VIRAT_S_000200_03_000657_000899.mp4'
# viewable at: https://drive.google.com/file/d/1C-HPOEIdZnJm_RSbYGXH6gMLhlmobaxn/view?usp=drive_link

!gdown "https://drive.google.com/uc?id=1C-HPOEIdZnJm_RSbYGXH6gMLhlmobaxn" -O test.mp4

Downloading...
From: https://drive.google.com/uc?id=1C-HPOEIdZnJm_RSbYGXH6gMLhlmobaxn
To: /content/test.mp4
100% 10.4M/10.4M [00:00<00:00, 27.2MB/s]


I used `ffmpeg -i VIRAT_S_000200_03_000657_000899.mp4 -c:v copy -frames:v 300 -r 30 -an ~/Desktop/VIRAT_S_000200_03_000657_000899_first300.mp4` to create cut the first 300 frames of this video to a clip



In [3]:
# display details of clip
!ffprobe -v quiet -print_format json -show_format -show_streams test.mp4

{
    "streams": [
        {
            "index": 0,
            "codec_name": "mpeg4",
            "codec_long_name": "MPEG-4 part 2",
            "profile": "Simple Profile",
            "codec_type": "video",
            "codec_time_base": "1/30",
            "codec_tag_string": "mp4v",
            "codec_tag": "0x7634706d",
            "width": 1280,
            "height": 720,
            "coded_width": 1280,
            "coded_height": 720,
            "has_b_frames": 0,
            "sample_aspect_ratio": "1:1",
            "display_aspect_ratio": "16:9",
            "pix_fmt": "yuv420p",
            "level": 3,
            "color_range": "tv",
            "color_space": "bt709",
            "color_transfer": "bt709",
            "color_primaries": "bt709",
            "chroma_location": "left",
            "refs": 1,
            "quarter_sample": "false",
            "divx_packed": "false",
            "r_frame_rate": "30/1",
            "avg_frame_rate": "30/1",
            "tim

In [4]:
!ls -lh

total 10M
drwxr-xr-x 1 root root 4.0K Jun 23 13:41 sample_data
-rw-r--r-- 1 root root  10M Jun 26 21:39 test.mp4


## 2. Ground Truth

- Data from https://gitlab.kitware.com/viratdata/viratannotations
- obtain the ground truth bounding boxes data
- store in Pandas DataFrame 'gtdf'

In [5]:
# VIRAT_S_000200_03_000657_000899.types.yml
!gdown "https://drive.google.com/uc?id=12h_35hXzoSciduBzDWmpKB2-rgUuraiN" -O test.types.yml

Downloading...
From: https://drive.google.com/uc?id=12h_35hXzoSciduBzDWmpKB2-rgUuraiN
To: /content/test.types.yml
  0% 0.00/2.71k [00:00<?, ?B/s]100% 2.71k/2.71k [00:00<00:00, 14.6MB/s]


In [6]:
# VIRAT_S_000200_03_000657_000899.regions.yml
!gdown "https://drive.google.com/uc?id=1Ieau47ZxLLpE6mw04XwjfANeQPB9m7hY" -O test.regions.yml

Downloading...
From: https://drive.google.com/uc?id=1Ieau47ZxLLpE6mw04XwjfANeQPB9m7hY
To: /content/test.regions.yml
100% 12.6M/12.6M [00:00<00:00, 49.1MB/s]


In [7]:
# VIRAT_S_000200_03_000657_000899.geom.yml
!gdown "https://drive.google.com/uc?id=1UH9s2MPSZFdJJ7TD827DrmqGPtR_CWdQ" -O test.geom.yml

Downloading...
From: https://drive.google.com/uc?id=1UH9s2MPSZFdJJ7TD827DrmqGPtR_CWdQ
To: /content/test.geom.yml
100% 8.54M/8.54M [00:00<00:00, 31.9MB/s]


In [8]:
# VIRAT_S_000200_03_000657_000899.activities.yml
!gdown "https://drive.google.com/uc?id=1tTAnWLE5f9FhbWCe4vElR7gD9aK0jYin" -O test.activities.yml

Downloading...
From: https://drive.google.com/uc?id=1tTAnWLE5f9FhbWCe4vElR7gD9aK0jYin
To: /content/test.activities.yml
  0% 0.00/14.3k [00:00<?, ?B/s]100% 14.3k/14.3k [00:00<00:00, 62.9MB/s]


In [9]:
!ls -lh

total 31M
drwxr-xr-x 1 root root 4.0K Jun 23 13:41 sample_data
-rw-r--r-- 1 root root  14K Jun 26 21:42 test.activities.yml
-rw-r--r-- 1 root root 8.2M Jun 26 21:42 test.geom.yml
-rw-r--r-- 1 root root  10M Jun 26 21:39 test.mp4
-rw-r--r-- 1 root root  12M Jun 26 21:42 test.regions.yml
-rw-r--r-- 1 root root 2.7K Jun 26 21:41 test.types.yml


In [10]:
# get bounding boxes, classes and track ids for detections in the first frame
import yaml

file_path = 'test.geom.yml'
with open(file_path, 'r') as file:
    geom = yaml.safe_load(file)
file_path = 'test.types.yml'
with open(file_path, 'r') as file:
    types = yaml.safe_load(file)

### Create Ground Truth DataFrame

AS pd.DataFrame </br>
format: </br>
{idx: geom_id0, </br>
track_id: geom_id1, </br>
label: types_cset3-key_on_id1, </br>
conf: types_cset3-value_on_id1, </br>
frame: geom_ts0, </br>
xmin: geom_g0.split(' ')[0], ymin: geom_g0.split(' ')[1], xmax: geom_g0.split(' ')[2], ymax: geom_g0.split(' ')[3]}

In [31]:
detections = []

# lists for populating dictionary then DataFrame
idx = []
track_id = []
labels = []
labels_ints = []
confs = []
frame = []
xmin, ymin, xmax, ymax = [], [], [], []

for i in geom:
  try:
    if i['geom']['id0'] > -1:  # populates data from all detections
      detections.append(i)
      idx.append(i['geom']['id0'])
      track_id.append(i['geom']['id1'])
      frame.append(i['geom']['ts0'])
      bb = i['geom']['g0'].split(' ')
      xmin.append(int(bb[0]))
      ymin.append(int(bb[1]))
      xmax.append(int(bb[2]))
      ymax.append(int(bb[3]))

      for j in types:
        try:
          if j['types']['id1'] == track_id[-1]:  # pulls labels and confidences (by track id --> 'id1') from types.yaml file
            label, conf = next(iter(j['types']['cset3'].items()))
            confs.append(conf)
            labels.append(label)
            # also save labels as ints for comparison with YOLOv8 Predictions later on
            if label == 'Person':
              labels_ints.append(0)
            elif label == 'Bike':
              labels_ints.append(1)
            elif label == 'Vehicle':
              labels_ints.append(2)
            else:
              labels_ints.append(-1)
        except:
          pass
  except:
    pass

print('idx: ', len(idx),
      'track_id: ', len(track_id),
      'label: ', len(labels),
      'conf: ', len(confs),
      'frame: ', len(frame))


gt = {'idx_gt': idx, 'track_id_gt': track_id, 'label_gt': labels,
      'label_as_int_gt': labels_ints, 'conf_gt': confs, 'frame_gt': frame,
      'xmin_gt': xmin, 'ymin_gt': ymin, 'xmax_gt': xmax, 'ymax_gt': ymax}
df_gt = pd.DataFrame(gt)
df_gt

idx:  79997 track_id:  79997 label:  79997 conf:  79997 frame:  79997


Unnamed: 0,idx_gt,track_id_gt,label_gt,label_as_int_gt,conf_gt,frame_gt,xmin_gt,ymin_gt,xmax_gt,ymax_gt
0,0,1,Vehicle,2,1.0,0,946,327,1010,360
1,1,1,Vehicle,2,1.0,1,945,326,1009,359
2,2,1,Vehicle,2,1.0,2,945,326,1009,359
3,3,1,Vehicle,2,1.0,3,945,326,1009,359
4,4,1,Vehicle,2,1.0,4,945,326,1009,359
...,...,...,...,...,...,...,...,...,...,...
79992,79992,5030,Door,-1,1.0,7238,72,423,95,487
79993,79993,5030,Door,-1,1.0,7239,72,423,95,487
79994,79994,5030,Door,-1,1.0,7240,72,423,95,487
79995,79995,5030,Door,-1,1.0,7241,72,423,95,487


In [32]:
df_gt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79997 entries, 0 to 79996
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   idx_gt           79997 non-null  int64  
 1   track_id_gt      79997 non-null  int64  
 2   label_gt         79997 non-null  object 
 3   label_as_int_gt  79997 non-null  int64  
 4   conf_gt          79997 non-null  float64
 5   frame_gt         79997 non-null  int64  
 6   xmin_gt          79997 non-null  int64  
 7   ymin_gt          79997 non-null  int64  
 8   xmax_gt          79997 non-null  int64  
 9   ymax_gt          79997 non-null  int64  
dtypes: float64(1), int64(8), object(1)
memory usage: 6.1+ MB


## 3. Get Predictions
- get inference from YOLOv8 with ByteTracker on this test clip
- store predictions in Pandas DataFrame 'preddf'

Install YOLOv8

In [14]:
!pip -q install ultralytics
!pip -q install lap

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m612.4/612.4 kB[0m [31m41.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for lap (setup.py) ... [?25l[?25hdone


In [15]:
from ultralytics import YOLO
import lap

model = YOLO('yolov8n.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to yolov8n.pt...
100%|██████████| 6.23M/6.23M [00:00<00:00, 8.37MB/s]


In [16]:
results = model.track(source='/content/test.mp4', conf=0.12, iou=0.5,
                      device=0, save_txt=True, imgsz=1280, classes=[0,1,2],
                      tracker="bytetrack.yaml", stream=True)
list_results = list(results)


video 1/1 (1/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 62.0ms
video 1/1 (2/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 13.4ms
video 1/1 (3/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 13.4ms
video 1/1 (4/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 13.3ms
video 1/1 (5/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 13.3ms
video 1/1 (6/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 13.4ms
video 1/1 (7/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 13.4ms
video 1/1 (8/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 13.4ms
video 1/1 (9/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 12.9ms
video 1/1 (10/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 12.9ms
video 1/1 (11/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 14.3ms
video 1/1 (12/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 12.9ms
video 1/1 (13/300) /content/test.mp4: 736x1280 3 persons, 5 cars, 12.9ms
video 1/1 (14/300) /content/test.mp4: 736x1280 3 persons, 5

In [17]:
# frame 1 detections as .txt file(s)
!cat runs/detect/track/labels/test_1.txt

2 0.137419 0.553092 0.0945308 0.0548357 1
2 0.763876 0.478838 0.0469688 0.0366087 2
2 0.123751 0.511463 0.0826546 0.044233 3
2 0.567661 0.733765 0.111929 0.0792986 4
0 0.232489 0.761387 0.019395 0.080564 5
0 0.581185 0.575837 0.0138645 0.05707 6
2 0.751597 0.962718 0.090546 0.0741219 7
0 0.909582 0.944357 0.0208885 0.095949 8


ultralytics.yolo.engine.results.Boxes attribute formats:
- Boxes.boxes, Boxes.data use [xmin, ymin, xmax, ymax, track_id, conf, class]
- xywh, xywhn use [xmid, ymid, w, h]
- xyxy, xyxyn use [xmin, ymin, xmax, ymax]