# Benchmark


**!** This notebook has to be run after the following notebooks:

-   [dataset.ipynb](dataset.ipynb)
-   [scoring.ipynb](scoring.ipynb)
-   [model.ipynb](model.ipynb)


In [5]:
import model
import dataset
import scoring

from typing import *

In [6]:
models: Dict[str, model.Tracker] = model.load_models()

| using constant padding
| using scales: [0.8333333333333334, 1.0, 1.2]
| using ordinary correlation
load pretrained model from models/SiamSE/checkpoint_vot.pth
remove prefix "module."
missing keys:set()
unused checkpoint keys:set()
test config:  {'MODEL': {'HEAD_TYPE': 'CORNER', 'HIDDEN_DIM': 384, 'NUM_OBJECT_QUERIES': 1, 'POSITION_EMBEDDING': 'sine', 'PREDICT_MASK': False, 'BACKBONE': {'PRETRAINED': True, 'PRETRAINED_PATH': '/YOUR/PRETRAINED/CVT/DIR/CvT-21-384x384-IN-22k.pth', 'INIT': 'trunc_norm', 'NUM_STAGES': 3, 'PATCH_SIZE': [7, 3, 3], 'PATCH_STRIDE': [4, 2, 2], 'PATCH_PADDING': [2, 1, 1], 'DIM_EMBED': [64, 192, 384], 'NUM_HEADS': [1, 3, 6], 'DEPTH': [1, 4, 16], 'MLP_RATIO': [4.0, 4.0, 4.0], 'ATTN_DROP_RATE': [0.0, 0.0, 0.0], 'DROP_RATE': [0.0, 0.0, 0.0], 'DROP_PATH_RATE': [0.0, 0.0, 0.1], 'QKV_BIAS': [True, True, True], 'CLS_TOKEN': [False, False, False], 'POS_EMBED': [False, False, False], 'QKV_PROJ_METHOD': ['dw_bn', 'dw_bn', 'dw_bn'], 'KERNEL_QKV': [3, 3, 3], 'PADDING_KV': [1

In [7]:
datasets: Dict = dataset.load_datasets()

Error while loading dataset /mnt/d/Documents/DNN_project/projet/data/mytc128/David gt and image files have different length
Error while loading dataset /mnt/d/Documents/DNN_project/projet/data/mytc128/Football1 gt and image files have different length
Error while loading dataset /mnt/d/Documents/DNN_project/projet/data/mytc128/Jogging1 gt and image files have different length
Error while loading dataset /mnt/d/Documents/DNN_project/projet/data/mytc128/Jogging2 gt and image files have different length
Error while loading dataset /mnt/d/Documents/DNN_project/projet/data/mytc128/Subway gt and image files have different length


In [8]:
models.keys(), datasets.keys()

(dict_keys(['Staple', 'SEsiamFC', 'PyECO', 'MixFormer', 'MIL', 'GOTURN']),
 dict_keys(['mytc128', 'myvot2021']))

# Tracking


In [9]:
import cv2
import numpy as np
import json
import os

from pathlib import Path

In [10]:
def write_video(out, image_file, gt, pred=None):
    """Write a frame to the video.

    Args:
        out (cv2.VideoWriter): Video writer.
        image_file (str): Path to the image file.
        gt (List[int]): Ground truth bounding box.
        pred (List[int], optional): Predicted bounding box. Defaults to None.
    """
    im = cv2.imread(image_file)
    cv2.rectangle(
        im,
        (int(gt[0]), int(gt[1])),
        (int(gt[0] + gt[2]), int(gt[1] + gt[3])),
        (0, 255, 0),
        2,
    )
    if pred is not None:
        cv2.rectangle(
            im,
            (int(pred[0]), int(pred[1])),
            (int(pred[0] + pred[2]), int(pred[1] + pred[3])),
            (0, 0, 255),
            2,
        )
    out.write(im)


def track_video(tracker, video, verbose=0, save_video=None):
    """Track a whole video.

    Args:
        tracker (model.Tracker): Tracker.
        video (Dict): Video.
        verbose (int, optional): Verbosity level. Defaults to 0.
        save_video (str, optional): Path to the video to save. Defaults to None.

    Returns:
        Tuple[float, float, float]: Precision, Success, FPS.
    """
    start_frame, toc = 0, 0

    pred = []
    image_files, gt = video["image_files"], video["gt"]

    if save_video is not None:
        sh = cv2.imread(image_files[0]).shape[:2]
        frame_size = (sh[1], sh[0])
        out = cv2.VideoWriter(
            save_video, cv2.VideoWriter_fourcc(*"DIVX"), 20.0, frame_size
        )
    
    lost = 0
    for f, image_file in enumerate(image_files):
        tic = cv2.getTickCount()

        if f == start_frame:  # init
            tracker.initialize(image_file, np.array(gt[f]))
            pred.append(gt[f])

            if save_video is not None:
                write_video(out, image_file, gt[f])

        elif f > start_frame:  # tracking
            pred_bbox = tracker.track(image_file)
            b_overlap = scoring.get_score(pred_bbox, gt[f])
            if b_overlap > 0:
                pred.append(pred_bbox)
            else:
                pred.append(2)
                start_frame = f + 5
                lost += 1

            if verbose > 1:
                print(f"{f} gt: {gt[f]} pred: {pred_bbox} overlap: {b_overlap}")

            if save_video is not None:
                write_video(out, image_file, gt[f], pred_bbox)
        else:
            pred.append(0)
            if save_video is not None:
                write_video(out, image_file, gt[f])

        toc += cv2.getTickCount() - tic

    toc /= cv2.getTickFrequency()

    precisions = [scoring.get_precision(gt[i], pred[i]) for i in range(len(gt))]
    scores = [scoring.get_score(gt[i], pred[i]) for i in range(len(gt))]

    fps = f / toc

    if verbose > 0:
        print(
            f'Video: {video["name"]:12s} Time: {toc:2.1f}s Speed: {fps:5.2f}fps Lost: {lost}'
        )

    if save_video is not None:
        out.release()

    return precisions, scores, fps, lost

In [11]:
dataset = "mytc128"
tracker = "MixFormer"

In [12]:
datasets[dataset].keys()

dict_keys(['Airport_ce', 'Baby_ce', 'Badminton_ce1', 'Badminton_ce2', 'Ball_ce1', 'Ball_ce2', 'Ball_ce3', 'Ball_ce4', 'Basketball', 'Basketball_ce1', 'Basketball_ce2', 'Basketball_ce3', 'Bee_ce', 'Bicycle', 'Biker', 'Bikeshow_ce', 'Bike_ce1', 'Bike_ce2', 'Bird', 'Board', 'Boat_ce1', 'Boat_ce2', 'Bolt', 'Boy', 'Busstation_ce1', 'Busstation_ce2', 'Carchasing_ce1', 'Carchasing_ce3', 'Carchasing_ce4', 'CarDark', 'CarScale', 'Charger_ce', 'Coke', 'Couple', 'Crossing', 'Cup', 'Cup_ce', 'David3', 'Deer', 'Diving', 'Doll', 'Eagle_ce', 'Electricalbike_ce', 'FaceOcc1', 'Face_ce', 'Face_ce2', 'Fish_ce1', 'Fish_ce2', 'Girl', 'Girlmov', 'Guitar_ce1', 'Guitar_ce2', 'Gym', 'Hand', 'Hand_ce1', 'Hand_ce2', 'Hurdle_ce1', 'Hurdle_ce2', 'Iceskater', 'Ironman', 'Juice', 'Kite_ce1', 'Kite_ce2', 'Kite_ce3', 'Kobe_ce', 'Lemming', 'Liquor', 'Logo_ce', 'Matrix', 'Messi_ce', 'Michaeljackson_ce', 'Microphone_ce1', 'Microphone_ce2', 'Motorbike_ce', 'MotorRolling', 'MountainBike', 'Panda', 'Plane_ce2', 'Plate_ce1',

In [13]:
video = "Ball_ce2"
t1 = models[tracker]
v1 = datasets[dataset][video]

video_name = f"{dataset}_{tracker}_{video}.avi"
# track_video(t1, v1, verbose=1, save_video=video_name)

In [14]:
# from ipywidgets import Video, Image

# new_ = Video.from_file(video_name, play=True, width=360, height=360)
# new_

In [15]:
def track_dataset(
    tracker, dataset, dataset_name, n=30, verbose=0, save_results=False, overwrite=False
) -> Tuple[float, float, float, float]:
    """Track a whole dataset.

    Args:
        tracker (model.Tracker): Tracker.
        dataset (Dict): Dataset.
        dataset_name (str): Dataset name.
        n (int, optional): Number of videos to track. Defaults to 3.
        verbose (int, optional): Verbosity level. Defaults to 0.
        save_results (bool, optional): Save results. Defaults to False.

    Returns:
        Tuple[float, float, float, float]: Mean Precisions, Successes, FPSs, Losts.
    """
    result_folder = Path("results")
    if save_results:
        (result_folder / tracker.model_name).mkdir(parents=True, exist_ok=True)

    file = result_folder / tracker.model_name / (dataset_name + ".json")

    results: Dict = {}
    count = 0

    if save_results:
        if os.path.exists(file):
            with open(file, 'r') as f:
                results = json.load(f)

    for video in dataset:
        if count == n:
            break
        count += 1

        if video in results and not overwrite:
            continue

        try:
            precisions, scores, fps, lost = track_video(tracker, dataset[video], verbose)
            if save_results:
                results[video] = {"precisions": precisions, "scores": scores, "fps": fps, "lost": lost}
                with open(file, 'w') as f:
                    json.dump(results, f, indent=2)
        except Exception as e:
            print(f"Error in {video}\n{e}")

In [16]:
def do_benchmark(models, datasets, verbose=0, save_results=False):
    """Benchmark a set of models on a set of datasets.

    Args:
        models (Dict): Models.
        datasets (Dict): Datasets.
        verbose (int, optional): Verbosity level. Defaults to 0.
        save_results (bool, optional): Save results. Defaults to False.
    """
    for model_name in models:
        for dataset_name in datasets:
            print(f"Benchmarking {model_name} on {dataset_name}")
            track_dataset(
                models[model_name],
                datasets[dataset_name],
                dataset_name,
                verbose=verbose,
                save_results=save_results,
                overwrite=False
            )

In [17]:
do_benchmark(models, datasets, verbose=1, save_results=True)

Benchmarking Staple on mytc128
