In [2]:
import cv2
import cv2.legacy
import numpy as np
import dlib
import time

In [3]:
def named_frame(frame: cv2.typing.MatLike, number: int) -> cv2.typing.MatLike:
    named_frame = frame.copy()
    cv2.putText(
        named_frame,
        str(number),
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (255, 0, 0),
        2,
        cv2.LINE_AA,
    )
    return named_frame

## DLib

In [24]:
tracker = dlib.correlation_tracker()
first_index = 35
last_index = 100
path = lambda idx: f"../data/test/scenario_8/{idx}.jpg"
frame = cv2.imread(path(first_index))
x, y, w, h = cv2.selectROI(frame, False)
# x, y, w, h = 238, 41, 16, 25
print(x, y, w, h)
cv2.destroyAllWindows()

cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow("Frame", frame)
key = cv2.waitKey(0)
tracker.start_track(frame, dlib.rectangle(x, y, x + w, y + h))


index = first_index + 1
while index < last_index:
    image = cv2.imread(path(index))
    start = time.time()
    quality = tracker.update(image)
    position = tracker.get_position()
    end = time.time()
    x, y, w, h = int(position.left()), int(position.top()), int(position.width()), int(position.height())

    print(f"time: {(end-start)*1000:.2f}ms,", f"quality: {quality:.2f}")
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

    cv2.imshow("Frame", named_frame(image, index))
    key = cv2.waitKey(0) & 0xFF
    if key == ord("q"):
        break
    elif key == 81:  # left arrow
        index -= 1
    elif key == 83:  # right arrow
        index += 1

cv2.destroyAllWindows()

Select a ROI and then press SPACE or ENTER button!
Cancel the selection process by pressing c button!
195 4 109 176
time: 9.21ms, quality: 17.68
time: 34.61ms, quality: 10.34
time: 33.78ms, quality: 10.68
time: 7.73ms, quality: 10.01
time: 34.18ms, quality: 9.26
time: 33.60ms, quality: 8.01
time: 32.68ms, quality: 7.60
time: 27.53ms, quality: 8.48
time: 36.87ms, quality: 8.37
time: 16.87ms, quality: 8.37
time: 34.14ms, quality: 8.82
time: 34.32ms, quality: 9.32
time: 7.89ms, quality: 9.55
time: 34.22ms, quality: 10.70
time: 34.29ms, quality: 10.80
time: 33.14ms, quality: 11.12
time: 32.74ms, quality: 11.49
time: 31.98ms, quality: 12.15
time: 33.35ms, quality: 12.64
time: 33.85ms, quality: 12.07
time: 35.48ms, quality: 12.90
time: 23.62ms, quality: 11.23
time: 36.44ms, quality: 12.95
time: 7.63ms, quality: 13.48
time: 34.71ms, quality: 13.67
time: 32.07ms, quality: 13.75
time: 35.77ms, quality: 12.33
time: 35.07ms, quality: 11.68
time: 33.06ms, quality: 14.14
time: 34.70ms, quality: 11.

## OpenCV trackers

In [8]:
def create_tracker():
    tracker_types = [
        "BOOSTING",
        "MIL",
        "KCF",
        "TLD",
        "MEDIANFLOW",
        "GOTURN",
        "MOSSE",
        "CSRT",
    ]
    tracker_type = "MEDIANFLOW"

    if tracker_type == "BOOSTING":
        tracker = cv2.legacy.TrackerBoosting_create()
    if tracker_type == "MIL":
        tracker = cv2.legacy.TrackerMIL_create()
    if tracker_type == "KCF":
        tracker = cv2.legacy.TrackerKCF_create()
    if tracker_type == "TLD":
        tracker = cv2.legacy.TrackerTLD_create()
    if tracker_type == "MEDIANFLOW":
        tracker = cv2.legacy.TrackerMedianFlow_create()
    if tracker_type == "GOTURN":
        tracker = cv2.legacy.TrackerGOTURN_create()
    if tracker_type == "MOSSE":
        tracker = cv2.legacy.TrackerMOSSE_create()
    if tracker_type == "CSRT":
        tracker = cv2.legacy.TrackerCSRT_create()

    return tracker


tracker = create_tracker()
first_index = 32
last_index = 800
path = lambda idx: f"../data/stairs/fwd/{idx}.jpg"
frame = cv2.imread(path(first_index))
bbox = cv2.selectROI(frame, False)
#bbox = 238, 41, 16, 25
x, y, w, h = bbox
print(x, y, w, h)
cv2.destroyAllWindows()

tracker.init(frame, bbox)
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow("Frame", frame)
key = cv2.waitKey(0)


index = first_index + 1
while index < last_index:
    image = cv2.imread(path(index))
    start = time.time()
    object_found, bbox = tracker.update(image)
    end = time.time()

    x, y, w, h = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
    print(f"time: {(end-start)*1000:.2f}ms,", f"found: {object_found}")
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

    cv2.imshow("Frame", named_frame(image, index))
    key = cv2.waitKey(0) & 0xFF

    if key == ord("q"):
        break
    elif key == 81:  # left arrow
        index -= 1
    elif key == 83:  # right arrow
        index += 1

cv2.destroyAllWindows()

Select a ROI and then press SPACE or ENTER button!
Cancel the selection process by pressing c button!
0 142 16 37
time: 3.18ms, found: True
time: 5.05ms, found: True
time: 3.15ms, found: True
time: 3.98ms, found: True
time: 3.24ms, found: True
time: 4.30ms, found: True
time: 4.89ms, found: True
time: 3.62ms, found: True
time: 3.23ms, found: True
time: 2.84ms, found: True
time: 1.10ms, found: True
time: 3.31ms, found: True
time: 3.13ms, found: True
time: 3.69ms, found: True
time: 3.35ms, found: True
time: 6.58ms, found: True
time: 3.61ms, found: True
time: 6.34ms, found: True
time: 2.44ms, found: True
time: 1.62ms, found: True
time: 2.39ms, found: True
time: 1.60ms, found: True
time: 2.99ms, found: True
time: 3.74ms, found: True
time: 2.90ms, found: True
time: 5.21ms, found: True
time: 2.35ms, found: True
time: 3.34ms, found: True
time: 4.62ms, found: True
time: 5.25ms, found: True
time: 3.22ms, found: True
time: 2.68ms, found: True
time: 2.46ms, found: True
time: 1.81ms, found: True
ti

MEDIANFLOW demonstrated great results, in terms of accuracy and speed (around 3 ms per frame)