In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import cv2
import os
import time

import torch
import torchvision.ops.boxes # to calculate iou score

### Visual Tracker Benchmark
http://www.visual-tracking.net

The publisher of this dataset has tagged the sequences with 11 attributes, where each represents a challenging aspect in visual tracking:

**IV**: Illumination Variation - the illumination in the target region is significantly changed. <br>
**SV**: Scale Variation - the ratio of the bounding boxes of the first frame and the current frame is out of the range ts, ts > 1 (ts=2). <br>
**OCC**: Occlusion - the target is partially or fully occluded. <br>
**DEF**: Deformation - non-rigid object deformation. <br>
**MB**: Motion Blur - the target region is blurred due to the motion of target or camera. <br>
**FM**: Fast Motion - the motion of the ground truth is larger than tm pixels (tm=20). <br>
**IPR**: In-Plane Rotation - the target rotates in the image plane. <br>
**OPR**: Out-of-Plane Rotation - the target rotates out of the image plane. <br>
**OV**: Out-of-View - some portion of the target leaves the view. <br>
**BC**: Background Clutters - the background near the target has the similar color or texture as the target. <br>
**LR**: Low Resolution - the number of pixels inside the ground-truth bounding box is less than tr (tr =400). <br>

In [2]:
attr_lst = ["IV", "SV", "OCC", "DEF", "MB",
            "FM", "IPR", "OPR", "OV", "BC", "LR"]

attr_txt = open('./attributes.txt', 'r')
attr_seq_lst = []
for attr_line in attr_txt.readlines():
    if attr_line != '\n':
        if attr_line[-1:] == '\n':
            attr_line = attr_line[:-1]
        attr_seq_lst.append(attr_line.split(', '))

# a list that shows each attribute and the corresponding sequences that have that challenging aspect
for i in range(len(attr_lst)):
    print("{0}: {1} ...".format(attr_lst[i], ", ".join(attr_seq_lst[i][0:3])))


IV: Basketball, Box, Car1 ...
SV: Biker, BlurBody, BlurCar2 ...
OCC: Basketball, Biker, Bird2 ...
DEF: Basketball, Bird1, Bird2 ...
MB: Biker, BlurBody, BlurCar1 ...
FM: Biker, Bird1, Bird2 ...
IPR: Bird2, BlurBody, BlurFace ...
OPR: Basketball, Biker, Bird2 ...
OV: Biker, Bird1, Board ...
BC: Basketball, Board, Bolt2 ...
LR: Biker, Car1, Freeman3 ...


### Sequences

In [3]:
seq_lst = os.listdir('./sequences')
print("{} ...".format(", ".join(seq_lst[0:10])))
print("Total number of sequences: {}".format(len(seq_lst)))


Basketball, Biker, Bird1, Bird2, BlurBody, BlurCar1, BlurCar2, BlurCar3, BlurCar4, BlurFace ...
Total number of sequences: 98


In [4]:
def get_num_of_frames(seq_name):
    frames = []
    seq_img_folder_path = './sequences/' + seq_name + '/img/'
    seq_img_folder = os.listdir(seq_img_folder_path)
    for img_name in seq_img_folder:
        if img_name.endswith('.jpg'):
            frames.append(img_name)
    return len(frames)


def get_first_frame_path(seq_name):
    frame_path = ''
    seq_img_folder_path = './sequences/' + seq_name + '/img/'
    seq_img_folder = os.listdir(seq_img_folder_path)
    for img_name in seq_img_folder:
        if img_name.endswith('.jpg'):
            frame_path = os.path.join(seq_img_folder_path, img_name)
            break
    return frame_path


def get_frame_size(seq_name):
    width = 0
    height = 0
    first_frame = []
    seq_img_folder_path = './sequences/' + seq_name + '/img/'
    seq_img_folder = os.listdir(seq_img_folder_path)
    first_frame = cv2.imread(get_first_frame_path(seq_name))

    height, width, channel = first_frame.shape
    return width, height


In [5]:
df_seq_info = pd.DataFrame(0, columns=[
                           'num_of_frames', 'frame_width', 'frame_height']+attr_lst, index=seq_lst)

for seq_idx, seq_row in df_seq_info.iterrows():
    num_of_frames = get_num_of_frames(seq_idx)
    frame_width, frame_height = get_frame_size(seq_idx)
    df_seq_info.at[seq_idx, 'num_of_frames'] = num_of_frames
    df_seq_info.at[seq_idx, 'frame_width'] = frame_width
    df_seq_info.at[seq_idx, 'frame_height'] = frame_height
    for attr_idx in range(len(attr_lst)):
        if seq_idx in attr_seq_lst[attr_idx]:
            df_seq_info.at[seq_idx, attr_lst[attr_idx]] = 1

df_seq_info


Unnamed: 0,num_of_frames,frame_width,frame_height,IV,SV,OCC,DEF,MB,FM,IPR,OPR,OV,BC,LR
Basketball,725,576,432,1,0,1,1,0,0,0,1,0,1,0
Biker,142,640,360,0,1,1,0,1,1,0,1,1,0,1
Bird1,408,720,400,0,0,0,1,0,1,0,0,1,0,0
Bird2,99,720,400,0,0,1,1,0,1,1,1,0,0,0
BlurBody,334,640,480,0,1,0,1,1,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Twinnings,472,320,240,0,1,0,0,0,0,0,1,0,0,0
Vase,271,320,240,0,1,0,0,0,1,1,0,0,0,0
Walking,412,768,576,0,1,1,1,0,0,0,0,0,0,0
Walking2,500,384,288,0,1,1,0,0,0,0,0,0,0,1


### DASIAMRPN Tracking Results

In [6]:
# list of available trackers
# 'DASIAMRPN' and 'GOTURN' are DL-based
available_trackers = ['BOOSTING', 'CSRT', 'DASIAMRPN', 'GOTURN', 'KCF', 'MEDIANFLOW', 'MIL', 'MOSSE', 'TLD']

# only DASIAMRPN was applied on all sequences, see the results folder for details
tracker_name = 'DASIAMRPN'

In [7]:
# FPS: average frames per second
# Fail: number of failed frames
# IoU: Intersection over Union

df_dasiamrpn_results = df_seq_info.drop(columns=['num_of_frames', 'frame_width', 'frame_height']).copy()
df_dasiamrpn_results['FPS'] = 0
df_dasiamrpn_results['IoU'] = 0
df_dasiamrpn_results['Fail'] = 0
df_dasiamrpn_results


Unnamed: 0,IV,SV,OCC,DEF,MB,FM,IPR,OPR,OV,BC,LR,FPS,IoU,Fail
Basketball,1,0,1,1,0,0,0,1,0,1,0,0,0,0
Biker,0,1,1,0,1,1,0,1,1,0,1,0,0,0
Bird1,0,0,0,1,0,1,0,0,1,0,0,0,0,0
Bird2,0,0,1,1,0,1,1,1,0,0,0,0,0,0
BlurBody,0,1,0,1,1,1,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Twinnings,0,1,0,0,0,0,0,1,0,0,0,0,0,0
Vase,0,1,0,0,0,1,1,0,0,0,0,0,0,0
Walking,0,1,1,1,0,0,0,0,0,0,0,0,0,0
Walking2,0,1,1,0,0,0,0,0,0,0,1,0,0,0


In [8]:
def return_fps_lst(fps_file_path):
    fps_lst = []
    with open(result_path+"fps.txt", "r") as fps_file:
        for line in fps_file:
            fps_lst.append(int(line.rstrip()))
    return fps_lst


def return_iou_fail(bbox_tracked_file_path, groundtruth_file_path):
    num_fail = 0
    bbox_tracked_lst = []
    bbox_truth_lst = []
    iou_lst = []
    
    bbox_tracked_file = open(bbox_tracked_file_path, "r")
    ## get the tracked (predicted) bounding boxes
    for line in bbox_tracked_file:
        temp_str = line.rstrip()
        if temp_str == "failed":
            num_fail += 1
            bbox_tracked_lst.append([0, 0, 0, 0])
        else:
            temp_bbox = temp_str.split(', ')  # in format [x, y, width, height]
            bbox_tracked_lst.append([
                int(temp_bbox[0]),
                int(temp_bbox[1]),
                int(temp_bbox[0]) + int(temp_bbox[2]),
                int(temp_bbox[1]) + int(temp_bbox[3])
            ])  # change to format [x_left, y_top, x_right, y_bottom]
    bbox_tracked_file.close()

    bbox_truth_file = open(groundtruth_file_path, "r")
    ## get the true bounding boxes
    for line in bbox_truth_file:
        # in format [x, y, width, height]
        line = line.strip().replace('\t', ',')
        temp_bbox = line.split(',')
        bbox_truth_lst.append([
            int(temp_bbox[0]),
            int(temp_bbox[1]),
            int(temp_bbox[0]) + int(temp_bbox[2]),
            int(temp_bbox[1]) + int(temp_bbox[3])
        ])  # change to format [x_left, y_top, x_right, y_bottom]
    bbox_truth_file.close()

    ## compute the iou of each frame
    for bbox_idx in range(min(len(bbox_tracked_lst), len(bbox_truth_lst)-1)):
        box_tracked = torch.tensor(
            [bbox_tracked_lst[bbox_idx]], dtype=torch.float)
        # +1 due to first frame
        box_truth = torch.tensor(
            [bbox_truth_lst[bbox_idx+1]], dtype=torch.float)
        iou_lst.append(torchvision.ops.boxes.box_iou(
            box_tracked, box_truth).item())

    return iou_lst, num_fail



for seq_name in seq_lst:
    result_path = "./results/" + seq_name + "/" + tracker_name + "/"
    
    # calculate fps for the current sequence
    fps_lst = return_fps_lst(result_path+"fps.txt")
    df_dasiamrpn_results.loc[seq_name, 'FPS'] = round(sum(fps_lst) / len(fps_lst))
    
    # obtain iou and fail count for the current sequence
    seq_path = "./sequences/" + seq_name + "/"
    num_targets = 1
    iou_lst = []
    num_fail = 0
    for filename in os.listdir(seq_path): # there can be multiple target objects
        if filename.startswith('groundtruth_rect'):
            temp_iou, temp_fail = return_iou_fail(result_path+"bbox_tracked_object" + str(num_targets) + ".txt",
                                                  seq_path + filename)
            iou_lst += temp_iou  # add the iou of the current target object to the list
            num_fail += temp_fail  # add the fail number of the current target object to the count
            num_targets += 1

    # update iou and fail count
    df_dasiamrpn_results.loc[seq_name, 'IoU'] = round(sum(iou_lst) / len(iou_lst), 4)
    df_dasiamrpn_results.loc[seq_name, 'Fail'] = num_fail
    


In [9]:
df_dasiamrpn_results

Unnamed: 0,IV,SV,OCC,DEF,MB,FM,IPR,OPR,OV,BC,LR,FPS,IoU,Fail
Basketball,1,0,1,1,0,0,0,1,0,1,0,32,0.5546,0
Biker,0,1,1,0,1,1,0,1,1,0,1,31,0.3363,0
Bird1,0,0,0,1,0,1,0,0,1,0,0,31,0.0806,0
Bird2,0,0,1,1,0,1,1,1,0,0,0,29,0.5766,0
BlurBody,0,1,0,1,1,1,1,0,0,0,0,28,0.8165,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Twinnings,0,1,0,0,0,0,0,1,0,0,0,32,0.6416,0
Vase,0,1,0,0,0,1,1,0,0,0,0,32,0.5713,0
Walking,0,1,1,1,0,0,0,0,0,0,0,32,0.7406,0
Walking2,0,1,1,0,0,0,0,0,0,0,1,32,0.2609,0


In [10]:
# Overall performance
df_dasiamrpn_results[["FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,98.0,98.0,98.0
mean,30.602041,0.580504,0.0
std,2.431494,0.197797,0.0
min,15.0,0.0149,0.0
25%,30.0,0.4574,0.0
50%,31.0,0.61915,0.0
75%,32.0,0.731375,0.0
max,32.0,0.8527,0.0


In [11]:
# IV sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["IV"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,38.0,38.0,38.0
mean,30.973684,0.602592,0.0
std,0.821562,0.17323,0.0
min,29.0,0.1869,0.0
25%,30.0,0.54025,0.0
50%,31.0,0.62475,0.0
75%,32.0,0.7322,0.0
max,32.0,0.8527,0.0


In [12]:
# SV sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["SV"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,63.0,63.0,63.0
mean,30.809524,0.591616,0.0
std,2.198659,0.195789,0.0
min,15.0,0.0149,0.0
25%,30.0,0.4625,0.0
50%,31.0,0.6613,0.0
75%,32.0,0.73385,0.0
max,32.0,0.8279,0.0


In [13]:
# OCC sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["OCC"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,47.0,47.0,47.0
mean,30.404255,0.513857,0.0
std,3.379346,0.205892,0.0
min,15.0,0.0149,0.0
25%,31.0,0.38625,0.0
50%,31.0,0.5546,0.0
75%,32.0,0.69595,0.0
max,32.0,0.8279,0.0


In [14]:
# DEF sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["DEF"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,42.0,42.0,42.0
mean,30.214286,0.554671,0.0
std,3.564977,0.223101,0.0
min,15.0,0.0149,0.0
25%,30.0,0.397375,0.0
50%,31.0,0.61585,0.0
75%,32.0,0.716725,0.0
max,32.0,0.8279,0.0


In [15]:
# MB sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["MB"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,29.0,29.0,29.0
mean,30.517241,0.544824,0.0
std,0.949462,0.218851,0.0
min,28.0,0.1304,0.0
25%,30.0,0.3874,0.0
50%,31.0,0.5879,0.0
75%,31.0,0.7323,0.0
max,32.0,0.8458,0.0


In [16]:
# FM sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["FM"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,38.0,38.0,38.0
mean,30.131579,0.58405,0.0
std,2.68299,0.18966,0.0
min,15.0,0.0806,0.0
25%,30.0,0.482475,0.0
50%,31.0,0.59705,0.0
75%,31.0,0.731375,0.0
max,32.0,0.8458,0.0


In [17]:
# IPR sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["IPR"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,51.0,51.0,51.0
mean,30.921569,0.599649,0.0
std,0.996858,0.159775,0.0
min,28.0,0.1869,0.0
25%,30.0,0.5327,0.0
50%,31.0,0.6175,0.0
75%,32.0,0.71665,0.0
max,32.0,0.8279,0.0


In [18]:
# OPR sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["OPR"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,61.0,61.0,61.0
mean,30.52459,0.573361,0.0
std,2.997813,0.186467,0.0
min,15.0,0.0149,0.0
25%,30.0,0.4755,0.0
50%,31.0,0.6109,0.0
75%,32.0,0.7122,0.0
max,32.0,0.8279,0.0


In [19]:
# OV sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["OV"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,14.0,14.0,14.0
mean,30.928571,0.52745,0.0
std,0.615728,0.198741,0.0
min,30.0,0.0806,0.0
25%,31.0,0.426325,0.0
50%,31.0,0.51395,0.0
75%,31.0,0.68405,0.0
max,32.0,0.8279,0.0


In [20]:
# BC sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["BC"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,31.0,31.0,31.0
mean,30.774194,0.54329,0.0
std,0.716923,0.214539,0.0
min,30.0,0.0149,0.0
25%,30.0,0.4096,0.0
50%,31.0,0.5886,0.0
75%,31.0,0.70705,0.0
max,32.0,0.8279,0.0


In [21]:
# LR sequence performance
df_dasiamrpn_results[(df_dasiamrpn_results["LR"]) == 1][[
    "FPS", "IoU", "Fail"]].describe()

Unnamed: 0,FPS,IoU,Fail
count,9.0,9.0,9.0
mean,31.444444,0.541511,0.0
std,0.726483,0.195308,0.0
min,30.0,0.2609,0.0
25%,31.0,0.3363,0.0
50%,32.0,0.5853,0.0
75%,32.0,0.7018,0.0
max,32.0,0.8095,0.0
