# Benchmark

**!** This notebook has to be run after the following notebooks:
- [dataset.ipynb](dataset.ipynb)
- [scoring.ipynb](scoring.ipynb)
- [model.ipynb](model.ipynb)

In [1]:
import model
import dataset
import scoring

In [2]:
models = model.load_models()

| using constant padding
| using scales: [0.8333333333333334, 1.0, 1.2]
| using ordinary correlation
load pretrained model from models/SiamSE/checkpoint_vot.pth
remove prefix "module."
missing keys:set()
unused checkpoint keys:set()




In [3]:
datasets = dataset.load_datasets()

Error while loading dataset David gt and image files have different length
Error while loading dataset Football1 gt and image files have different length
Error while loading dataset Jogging1 gt and image files have different length
Error while loading dataset Jogging2 gt and image files have different length
Error while loading dataset Subway gt and image files have different length


In [4]:
models.keys(), datasets.keys()

(dict_keys(['SEsiamFC', 'AAA']), dict_keys(['mytc128', 'myvot2021']))

# Tracking

In [5]:
t1 = models['SEsiamFC']
d1 = datasets['mytc128']
v1 = d1['Cup']

In [6]:
import cv2
import numpy as np
import json
import os

In [7]:
def write_video(out, image_file, gt, pred=None):
    im = cv2.imread(image_file)
    cv2.rectangle(im, (int(gt[0]), int(gt[1])), (int(gt[0] + gt[2]), int(gt[1] + gt[3])), (0, 255, 0), 2)
    if pred is not None:
        cv2.rectangle(im, (int(pred[0]), int(pred[1])), (int(pred[0] + pred[2]), int(pred[1] + pred[3])), (0, 0, 255), 2)
    out.write(im)

def track_video(tracker, video, verbose=0, save_video=None):
    start_frame, toc = 0, 0

    pred = []
    image_files, gt = video['image_files'], video['gt']

    if save_video is not None:
        sh = cv2.imread(image_files[0]).shape[:2]
        frame_size = (sh[1], sh[0])
        out = cv2.VideoWriter(save_video, cv2.VideoWriter_fourcc(*'DIVX'), 20.0, frame_size)


    for f, image_file in enumerate(image_files):
        tic = cv2.getTickCount()

        if f == start_frame:  # init
            tracker.initialize(image_file, np.array(gt[f]))
            pred.append(gt[f])
            
            if save_video is not None:
                write_video(out, image_file, gt[f])

        elif f > start_frame:  # tracking
            pred_bbox = tracker.track(image_file)
            b_overlap = scoring.get_precision(gt[f], pred_bbox)
            if b_overlap > 0:
                pred.append(pred_bbox)
            else:
                pred.append(2)
                start_frame = f + 5

            if verbose > 1:
                print("{} gt: {} pred: {} overlap: {}".format(f, gt[f], pred_bbox, b_overlap))

            if save_video is not None:
                write_video(out, image_file, gt[f], pred_bbox)

        else:
            pred.append(0)
            if save_video is not None:
                write_video(out, image_file, gt[f])

        toc += cv2.getTickCount() - tic

    toc /= cv2.getTickFrequency()

    precisions = [scoring.get_precision(gt[i], pred[i]) for i in range(len(gt))]
    precisions = np.array(precisions)
    mprec = np.mean(precisions)

    success = [scoring.is_success(gt[i], pred[i]) for i in range(len(gt))]
    success = np.array(success)
    msucc = np.mean(success)

    fps = f / toc

    if verbose > 0:
        print('Video: {:12s} Time: {:2.1f}s Speed: {:5.2f}fps mSuccess: {:3.2f} mPrecision {:3.2f}'.format(video['name'], toc, f / toc, msucc, mprec))
    
    if save_video is not None:
        out.release()

    return mprec, msucc, fps

In [8]:
# track_video(t1, v1, verbose=1, save_video='SEsiamFC_mytc128_Cup.avi')

In [20]:
def track_dataset(tracker, dataset, dataset_name, n = 3, verbose=0, save_results=False):
    precisions = []
    success = []
    fpss = []
    k = 0

    if save_results:
        if not os.path.exists(os.path.join("results")):
            os.makedirs(os.path.join("results"))

        if not os.path.exists(os.path.join("results", tracker.model_name)):
            os.makedirs(os.path.join("results", tracker.model_name))

    j = {}

    for video in dataset:
        if k == n:
            break

        precision, succes, fps = track_video(tracker, dataset[video], verbose)

        precisions.append(precision)
        success.append(succes)
        fpss.append(fps)

        if save_results:
            j[video] = {'precision': precision, 'succes': succes, 'fps': fps}

        k += 1

    if save_results:
        with open(os.path.join("results", tracker.model_name, dataset_name + '.json'), 'w') as f:
            json.dump(j, f, indent=2)

    return np.mean(precisions), np.mean(success), np.mean(fpss)

In [21]:
# track_dataset(t1, d1, 'mytc128', n = 3, verbose=1, save_results=True)

In [22]:
def do_benchmark(models, datasets, verbose=0, save_results=False):
    for model_name in models:
        for dataset_name in datasets:
            print("Benchmarking {} on {}".format(model_name, dataset_name))
            track_dataset(models[model_name], datasets[dataset_name], dataset_name, verbose=verbose, save_results=save_results)

In [23]:
do_benchmark(models, datasets, verbose=1, save_results=True)

Benchmarking SEsiamFC on mytc128
Video: Airport_ce   Time: 21.4s Speed:  6.88fps mSuccess: 0.91 mPrecision 0.78
Video: Baby_ce      Time: 49.1s Speed:  6.00fps mSuccess: 1.00 mPrecision 0.64
Video: Badminton_ce1 Time: 95.1s Speed:  6.08fps mSuccess: 1.00 mPrecision 0.74
3 3
Benchmarking SEsiamFC on myvot2021
Video: agility      Time: 14.8s Speed:  6.67fps mSuccess: 0.66 mPrecision 0.45
Video: animal       Time: 13.1s Speed:  7.55fps mSuccess: 0.65 mPrecision 0.49
Video: ants1        Time: 70.3s Speed:  4.61fps mSuccess: 0.90 mPrecision 0.50
3 3
Benchmarking AAA on mytc128
Video: Airport_ce   Time: 1.6s Speed: 92.61fps mSuccess: 0.17 mPrecision 0.17
Video: Baby_ce      Time: 2.7s Speed: 110.60fps mSuccess: 0.17 mPrecision 0.17
Video: Badminton_ce1 Time: 5.7s Speed: 101.95fps mSuccess: 0.17 mPrecision 0.17
3 3
Benchmarking AAA on myvot2021
Video: agility      Time: 1.1s Speed: 91.22fps mSuccess: 0.17 mPrecision 0.17
Video: animal       Time: 1.2s Speed: 83.05fps mSuccess: 0.17 mPrecision