### Install Kaggle and video dataset

In [2]:
!pip install -q kaggle
!kaggle datasets download hbahruz/multiple-lego-tracking-dataset

Dataset URL: https://www.kaggle.com/datasets/hbahruz/multiple-lego-tracking-dataset
License(s): other
Downloading multiple-lego-tracking-dataset.zip to /content
100% 1.33G/1.33G [01:18<00:00, 20.3MB/s]
100% 1.33G/1.33G [01:19<00:00, 18.0MB/s]


In [3]:
import zipfile

def extract_zip(file_path, extract_to='.'):
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

extract_zip('multiple-lego-tracking-dataset.zip', 'lego-tracking')

### Pre-trained parameters has been uploaded as ZIP folder
### It is unzipped

In [9]:
extract_zip('rt-detr.zip', 'lego-tracking')

### Install libraries

In [4]:
!pip install ultralytics deep_sort_realtime
!pip install torch torchvision torchaudio

Collecting ultralytics
  Downloading ultralytics-8.3.1-py3-none-any.whl.metadata (34 kB)
Collecting deep_sort_realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.8-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.3.1-py3-none-any.whl (881 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m881.3/881.3 kB[0m [31m49.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m51.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.8-py3-none-any.whl (26 kB)
Installing collected packages: deep_sort_realtime, ultralytics-thop, ultralytics
Successfully installed deep_sort_realtime-1.3.2 ultralytics-8.3.1 ultralytics-thop-2.0.8


### Import libraries

In [5]:
from ultralytics import RTDETR
from deep_sort_realtime.deepsort_tracker import DeepSort

In [6]:
import os
import torch
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re

In [23]:
base_path = "/content/lego-tracking/Lego_Tracking/Test"
test_videos = os.listdir(base_path)
test_videos.sort()
test_videos

['Lego1', 'Lego2']

In [24]:
def detect_and_track(train_video, model, tracker, pred_conf, fps = 20.0):
    video_frames = os.path.join(base_path, f"{train_video}/img1")

    ###################### To record the video
    first_frame_path = os.path.join(video_frames, 'frame0001.jpg')
    first_frame = cv2.imread(first_frame_path)
    frame_height, frame_width = first_frame.shape[:2]

    video_output = os.path.join('/content/', f'{train_video}_tracked.mp4')
    video_writer = cv2.VideoWriter(video_output, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
    ######################

    tracking_results = []
    unique_track_ids = set()
    frames = os.listdir(video_frames)
    frame_pattern = re.compile(r'frame(\d+)\.jpg')
    frames_sorted = sorted(frames, key=lambda x: int(frame_pattern.match(x).group(1)))

    tracker_initialized = False
    for frame_id, frame_name in enumerate(frames_sorted):
        frame_path = os.path.join(video_frames, frame_name)
        frame = cv2.imread(frame_path)

        result_frame = model(frame, conf=pred_conf)
        detections = result_frame[0].boxes.xywh.tolist()
        confidences = result_frame[0].boxes.conf.tolist()


        # Prepare detections in the format required by DeepSort
        formatted_detections = []
        for (x_center, y_center, width, height), conf in zip(detections, confidences):
            left = x_center - width / 2
            top = y_center - height / 2
            formatted_detections.append(([left, top, width, height], conf, 0))

        if not tracker_initialized and len(formatted_detections) > 0:
            tracker_initialized = True
        if len(formatted_detections) > 0:
            tracks = tracker.update_tracks(formatted_detections, frame=frame)
        else:
            tracks = []
        current_frame_object_count = 0

        for track in tracks:
            if not track.is_confirmed():
                continue
            track_id = track.track_id
            x1, y1, x2, y2 = map(int, track.to_ltrb())
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"ID: {track_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            current_frame_object_count += 1
            unique_track_ids.add(track_id)
            tracking_results.append([frame_id, int(track_id), x1, y1, x2 - x1, y2 - y1])

        cv2.putText(frame, f"Objects in frame: {current_frame_object_count}", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        cv2.putText(frame, f"Unique objects: {len(unique_track_ids)}", (10, 70),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        video_writer.write(frame)

    video_writer.release()
    return tracking_results

In [48]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [49]:
model = RTDETR('/content/lego-tracking/rt-detr/best.pt').to(device)
test_frames = [20, 25]

tracking_results_dict = {}
for train_video, test_frame in zip(test_videos, test_frames):
    print(5*"-------------------------------------------------------------------------------------------------------\n")
    print(train_video)
    tracker = DeepSort(max_age=10, n_init=3, nms_max_overlap=0.8)
    tr_res = detect_and_track(train_video, model, tracker, 0.25, float(test_frame))
    tracking_results_dict[train_video] = tr_res

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Speed: 4.4ms preprocess, 44.8ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 8 class_0s, 44.1ms
Speed: 3.8ms preprocess, 44.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 8 class_0s, 55.0ms
Speed: 4.0ms preprocess, 55.0ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 8 class_0s, 47.4ms
Speed: 4.9ms preprocess, 47.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 7 class_0s, 53.8ms
Speed: 10.6ms preprocess, 53.8ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 7 class_0s, 44.8ms
Speed: 4.1ms preprocess, 44.8ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 7 class_0s, 44.9ms
Speed: 4.1ms preprocess, 44.9ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 8 class_0s, 44.3ms
Speed: 4.2ms preprocess, 44.3ms infere

### Installing and Importing MOTMetrics

In [14]:
!pip install motmetrics

Collecting motmetrics
  Downloading motmetrics-1.4.0-py3-none-any.whl.metadata (20 kB)
Collecting xmltodict>=0.12.0 (from motmetrics)
  Downloading xmltodict-0.13.0-py2.py3-none-any.whl.metadata (7.7 kB)
Downloading motmetrics-1.4.0-py3-none-any.whl (161 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.5/161.5 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict, motmetrics
Successfully installed motmetrics-1.4.0 xmltodict-0.13.0


In [15]:
import motmetrics as mm

### Metric Evaluation Functions

In [27]:
def load_ground_truth(gt_file):
    ground_truths = []
    with open(gt_file, 'r') as file:
        for line in file:
            parts = list(map(float, line.strip().split(',')))
            frame_id, obj_id, x, y, width, height = int(parts[0]), int(parts[1]), parts[2], parts[3], parts[4], parts[5]
            ground_truths.append([frame_id, obj_id, x, y, width, height])
    return np.array(ground_truths)

In [28]:
def evaluate_tracking(ground_truths, tracking_results):
    def results_to_motchallenge_format(results):
        return np.array([[r[0], -1, r[2], r[3], r[4], r[5], 1, -1, -1, -1] for r in results])

    gt_mot = results_to_motchallenge_format(ground_truths)
    tr_mot = results_to_motchallenge_format(tracking_results)

    acc = mm.MOTAccumulator(auto_id=True)

    gt_dict = {frame_id: [] for frame_id in np.unique(ground_truths[:, 0])}
    for gt in gt_mot:
        gt_dict[gt[0]].append(gt[1:])

    tr_dict = {frame_id: [] for frame_id in np.unique(tracking_results[:, 0])}
    for tr in tr_mot:
        tr_dict[tr[0]].append(tr[1:])

    for frame_id in gt_dict.keys():
        gt_frame = gt_dict[frame_id]
        tr_frame = tr_dict.get(frame_id, [])

        gt_ids = [gt[0] for gt in gt_frame]
        tr_ids = [tr[0] for tr in tr_frame]

        gt_bboxes = np.array([gt[1:5] for gt in gt_frame])
        tr_bboxes = np.array([tr[1:5] for tr in tr_frame])

        distances = mm.distances.iou_matrix(gt_bboxes, tr_bboxes, max_iou=1)

        acc.update(gt_ids, tr_ids, distances)

    mh = mm.metrics.create()
    summary = mh.compute(acc, metrics=['mota', 'motp', 'idf1', 'recall', 'precision', 'num_objects', 'num_matches', 'num_false_positives', 'num_misses', 'num_switches'])

    strsummary = mm.io.render_summary(summary, formatters=mh.formatters)
    print(strsummary)

    return summary

In [64]:
def aggregate_metrics(gt_files, tracking_results_dict):
    overall_acc = mm.MOTAccumulator(auto_id=True)

    dct = {}
    metrics=['mota', 'motp', 'idf1', 'recall', 'precision', 'num_objects', 'num_matches', 'num_false_positives', 'num_misses', 'num_switches']


    for gt_file, track_key in zip(gt_files, tracking_results_dict):
        track_key = gt_file.split("/")[-2]
        print(gt_file, track_key)
        ground_truths = load_ground_truth(gt_file)
        tr_mot = np.array(tracking_results_dict[track_key])

        et = evaluate_tracking(ground_truths, tr_mot)
        for metric in metrics:
            dct[metric] = dct.get(metric, 0) + et[metric]


    for metric in metrics:
        dct[metric] /= len(gt_files)
    return pd.DataFrame(dct)

In [45]:
main_path = "/content/lego-tracking/Lego_Tracking/Test"
gt_files = [os.path.join(main_path, vid, "gt.txt") for vid in os.listdir(main_path)]
gt_files.sort()
gt_files

['/content/lego-tracking/Lego_Tracking/Test/Lego1/gt.txt',
 '/content/lego-tracking/Lego_Tracking/Test/Lego2/gt.txt']

In [46]:
sorted_dict = dict(sorted(tracking_results_dict.items()))
print(sorted_dict)

{'Lego1': [[20, 1, 401, -2, 267, 20], [21, 1, 348, -2, 320, 24], [22, 1, 315, -2, 352, 26], [22, 2, 547, 0, 192, 44], [23, 1, 282, -2, 384, 29], [23, 2, 541, 0, 211, 50], [23, 6, 383, 0, 162, 37], [24, 1, 249, -3, 416, 32], [24, 2, 536, 0, 228, 56], [24, 6, 370, 0, 185, 44], [25, 1, 216, -3, 448, 34], [25, 2, 521, 1, 248, 60], [25, 6, 363, 0, 199, 49], [26, 1, 183, -3, 480, 36], [26, 2, 516, 1, 264, 65], [26, 6, 353, 0, 214, 54], [27, 1, 150, -3, 512, 39], [27, 2, 510, 1, 282, 69], [27, 6, 347, 0, 221, 58], [28, 1, 117, -4, 544, 42], [28, 2, 504, 2, 300, 73], [28, 6, 344, 0, 226, 62], [29, 1, 84, -4, 576, 44], [29, 2, 499, 2, 317, 77], [29, 6, 343, 0, 227, 65], [29, 8, 599, 0, 111, 76], [30, 1, 51, -4, 608, 47], [30, 2, 493, 2, 335, 82], [30, 6, 339, 0, 233, 69], [30, 8, 599, 0, 119, 81], [31, 1, 19, -4, 639, 49], [31, 2, 487, 3, 352, 86], [31, 6, 337, 0, 236, 73], [31, 8, 599, 0, 123, 84], [32, 2, 482, 3, 369, 90], [32, 6, 334, 0, 243, 79], [32, 8, 598, 0, 129, 90], [33, 2, 476, 3, 38

In [32]:
aggregate_metrics(gt_files, sorted_dict)  # (max_age=20, n_init=3, nms_max_overlap=0.8)

/content/lego-tracking/Lego_Tracking/Test/Lego1/gt.txt Lego1
   mota  motp   idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 80.3% 0.899 953.6%  99.6%     83.8%       10628       10586                2053         42            0
/content/lego-tracking/Lego_Tracking/Test/Lego2/gt.txt Lego2
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 89.2% 0.684 1089.5%  97.3%     92.4%       12601       12257                1011        344            0


Unnamed: 0,mota,motp,idf1,recall,precision,num_objects,num_matches,num_false_positives,num_misses,num_switches
0,0.847674,0.79133,10.215444,0.984374,0.880684,11614.5,11421.5,1532.0,193.0,0.0


In [37]:
aggregate_metrics(gt_files, sorted_dict)  # (max_age=10, n_init=3, nms_max_overlap=0.8)

/content/lego-tracking/Lego_Tracking/Test/Lego1/gt.txt Lego1
   mota  motp   idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 90.2% 0.880 922.4%  99.2%     91.6%       10628       10546                 961         82            0
/content/lego-tracking/Lego_Tracking/Test/Lego2/gt.txt Lego2
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 91.3% 0.647 1066.1%  95.3%     95.9%       12601       12012                 509        589            0


Unnamed: 0,mota,motp,idf1,recall,precision,num_objects,num_matches,num_false_positives,num_misses,num_switches
0,0.907364,0.763426,9.942421,0.972771,0.937917,11614.5,11279.0,735.0,335.5,0.0


In [42]:
aggregate_metrics(gt_files, sorted_dict)  # (max_age=10, n_init=3, nms_max_overlap=0.9)

/content/lego-tracking/Lego_Tracking/Test/Lego1/gt.txt Lego1
   mota  motp   idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 89.4% 0.889 924.6%  99.3%     91.0%       10628       10554                1050         74            0
/content/lego-tracking/Lego_Tracking/Test/Lego2/gt.txt Lego2
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 90.9% 0.634 1068.4%  95.6%     95.3%       12601       12045                 591        556            0


Unnamed: 0,mota,motp,idf1,recall,precision,num_objects,num_matches,num_false_positives,num_misses,num_switches
0,0.901609,0.761426,9.965312,0.974457,0.931371,11614.5,11299.5,820.5,315.0,0.0


In [47]:
aggregate_metrics(gt_files, sorted_dict)  # (max_age=10, n_init=5, nms_max_overlap=0.8)

/content/lego-tracking/Lego_Tracking/Test/Lego1/gt.txt Lego1
   mota  motp   idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 92.1% 0.879 909.5%  98.3%     94.0%       10628       10450                 662        178            0
/content/lego-tracking/Lego_Tracking/Test/Lego2/gt.txt Lego2
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 92.1% 0.638 1054.2%  94.3%     97.7%       12601       11884                 282        717            0


Unnamed: 0,mota,motp,idf1,recall,precision,num_objects,num_matches,num_false_positives,num_misses,num_switches
0,0.920842,0.758237,9.818498,0.963176,0.958623,11614.5,11167.0,472.0,447.5,0.0


## Train

In [66]:
base_path = "/content/lego-tracking/Lego_Tracking/Train"
train_videos = os.listdir(base_path)
train_videos.sort()
train_videos = train_videos[:1] +  train_videos[2:] + train_videos[1:2]
train_videos

['Lego1',
 'Lego2',
 'Lego3',
 'Lego4',
 'Lego5',
 'Lego6',
 'Lego7',
 'Lego8',
 'Lego9',
 'Lego10']

In [None]:
model = RTDETR('/content/lego-tracking/rt-detr/best.pt').to(device)
train_frames = [16, 13, 20, 20, 21, 20, 15, 13, 19, 20]

tracking_results_dict = {}
for train_video, train_frame in zip(train_videos, train_frames):
    print(5*"-------------------------------------------------------------------------------------------------------\n")
    print(train_video)
    tracker = DeepSort(max_age=20, n_init=3, nms_max_overlap=0.8)
    tr_res = detect_and_track(train_video, model, tracker, 0.25, float(test_frame))
    tracking_results_dict[train_video] = tr_res

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

0: 640x640 2 class_0s, 35.6ms
Speed: 4.6ms preprocess, 35.6ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 class_0s, 35.6ms
Speed: 5.6ms preprocess, 35.6ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 class_0s, 36.0ms
Speed: 5.1ms preprocess, 36.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 class_0s, 43.7ms
Speed: 4.6ms preprocess, 43.7ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 class_0s, 36.4ms
Speed: 4.6ms preprocess, 36.4ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 class_0s, 36.3ms
Speed: 7.0ms preprocess, 36.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 class_0s, 36.3ms
Speed: 4.8ms preprocess, 36.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 class_0s, 38.3ms
Speed: 5

In [None]:
main_path = "/content/lego-tracking/Lego_Tracking/Train"
gt_files = [os.path.join(main_path, vid, "gt.txt") for vid in os.listdir(main_path)]
gt_files.sort()
gt_files = gt_files[:1] + gt_files[2:] + gt_files[1:2]
gt_files

In [None]:
sorted_dict = dict(sorted(tracking_results_dict.items()))
print(sorted_dict)

In [65]:
aggregate_metrics(gt_files, sorted_dict)  # (max_age=10, n_init=3, nms_max_overlap=0.8)

/content/lego-tracking/Lego_Tracking/Train/Lego1/gt.txt Lego1
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 91.7% 0.899 1037.2%  97.8%     94.1%        2220        2172                 136         48            0
/content/lego-tracking/Lego_Tracking/Train/Lego2/gt.txt Lego2
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 91.4% 0.874 1252.4%  92.2%     99.2%        1648        1520                  13        128            0
/content/lego-tracking/Lego_Tracking/Train/Lego3/gt.txt Lego3
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 93.4% 0.561 1133.4%  99.0%     94.6%       10549       10445                 596        104            0
/content/lego-tracking/Lego_Tracking/Train/Lego4/gt.txt Lego4
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 9

Unnamed: 0,mota,motp,idf1,recall,precision,num_objects,num_matches,num_false_positives,num_misses,num_switches
0,0.923367,0.806489,11.884874,0.973418,0.952048,6307.3,6179.7,285.1,127.6,0.0


In [71]:
aggregate_metrics(gt_files, sorted_dict)  # (max_age=20, n_init=3, nms_max_overlap=0.8)

/content/lego-tracking/Lego_Tracking/Train/Lego1/gt.txt Lego1
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 85.4% 0.915 1066.1%  98.4%     88.3%        2220        2184                 288         36            0
/content/lego-tracking/Lego_Tracking/Train/Lego2/gt.txt Lego2
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 92.1% 0.884 1282.5%  94.4%     97.6%        1648        1555                  38         93            0
/content/lego-tracking/Lego_Tracking/Train/Lego3/gt.txt Lego3
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 87.3% 0.674 1161.3%  99.3%     89.2%       10549       10480                1275         69            0
/content/lego-tracking/Lego_Tracking/Train/Lego4/gt.txt Lego4
   mota  motp    idf1 recall precision num_objects num_matches num_false_positives num_misses num_switches
0 8

Unnamed: 0,mota,motp,idf1,recall,precision,num_objects,num_matches,num_false_positives,num_misses,num_switches
0,0.868199,0.826528,12.171069,0.981701,0.899511,6307.3,6225.0,642.7,82.3,0.0
