# Dataset

We chose the following dataset to evaluate different models on tracking tasks:
- [TC-128](https://www3.cs.stonybrook.edu/~hling/data/TColor-128/TColor-128.html#dataset) (Present in the article)
- [VOT2021](https://www.votchallenge.net/vot2021/dataset.html) (Recent dataset)

This notebook display samples of datasets and how they are modified to feat the models.

To feat the models evaluation. The folder should be in the **data** folder and be structured as follow:

- *my*dataset_name
    - *img*
        - img1.jpg
        - img2.jpg
        - ...
    - *groundtruth.txt*

---

groundtruth.txt should be structured as follow:

```
x,y,w,h
x,y,w,h
...
```

In [2]:
import csv
import cv2
import matplotlib.pyplot as plt
import os
import re
import shutil
import urllib.request

**Change the location with the following cell.**

In [4]:
%%writefile dataset.py

path_to_dataset = "/mnt/d/Documents/DNN_project/data"

Overwriting dataset.py


In [5]:
from dataset import path_to_dataset

## TC-128

This dataset contains 128 videos.

In [6]:
def download_tc128_and_format(all: bool = False):
    with urllib.request.urlopen('https://www3.cs.stonybrook.edu/~hling/data/TColor-128/seqs/') as f:
        html = f.read().decode('utf-8')
        zip = re.findall(r'href="(.*?\.zip)"', html)

    if not os.path.exists(os.path.join(path_to_dataset, 'mytc128')):
        os.makedirs(os.path.join(path_to_dataset, 'mytc128'))

    for i in range(len(zip)):
        zip_without_ext = zip[i][:-4]
        path_zip = os.path.join(path_to_dataset, "mytc128", zip[i])
        path_no_zip = os.path.join(path_to_dataset, "mytc128")
        
        # Download zip file
        urllib.request.urlretrieve('https://www3.cs.stonybrook.edu/~hling/data/TColor-128/seqs/'+zip[i], path_zip)
        shutil.unpack_archive(path_zip, path_no_zip)
        os.remove(path_zip)

        # Remove useless files    
        os.remove(os.path.join(path_no_zip, zip_without_ext, zip_without_ext + "_att.txt"))
        os.remove(os.path.join(path_no_zip, zip_without_ext, zip_without_ext + "_frames.txt"))
        
        # Rename groundtruth file to fit the code
        os.rename(os.path.join(path_no_zip, zip_without_ext, zip_without_ext + "_gt.txt"), os.path.join(path_no_zip, zip_without_ext, "groundtruth.txt"))
        
        if not all:
            break

In [7]:
# download_tc128_and_format(True)

# VOT 2021

This dataset contains semantic labelisation of 60 videos. We will use the semantic labelisation to compute the ground truth bounding boxes.

In [8]:
import vot
from vot import dataset

In [9]:
def semantic2bbox(path_semantic : str, path_bbox : str):
    """Convert semantic segmentation to bounding box"""

    with open(path_semantic, 'r') as f:
        reader = csv.reader(f)
        semantic = list(reader)
    
    semantic = list(map(lambda x: [x[0][1:]] + x[1:], semantic))
    semantic = list(map(lambda x: list(map(lambda y: int(y), x)), semantic))
    semantic = list(map(lambda x: x[:4], semantic))

    with open(path_bbox, 'w') as f:
        writer = csv.writer(f)
        writer.writerows(semantic)

In [10]:
def download_vot2021_dataset_and_format(all: bool = False):
    path_vot2021, path_myvot2021 = os.path.join(path_to_dataset, "vot2021"), os.path.join(path_to_dataset, "myvot2021")

    # Uncomment the following line to download the VOT2021 dataset
    dataset.download_dataset(dataset.vot._VOT_DATASETS["vot-st2021"], os.path.join(path_to_dataset, "vot2021"))
    
    if not os.path.exists(path_myvot2021):
        os.mkdir(path_myvot2021)
    
    videos = os.listdir(path_vot2021)
    
    for video in videos:
        if not os.path.isdir(os.path.join(path_vot2021, video)):
            continue
        if not os.path.exists(os.path.join(path_myvot2021, video)):
            os.mkdir(os.path.join(path_myvot2021, video))
        semantic2bbox(os.path.join(path_vot2021, video, "groundtruth.txt"), os.path.join(path_myvot2021, video, "groundtruth.txt"))
        shutil.copytree(os.path.join(path_vot2021, video, "color"), os.path.join(path_myvot2021, video, "img"))
        
        if not all:
            break

    shutil.rmtree(path_vot2021)

In [11]:
# download_vot2021_dataset_and_format(True)

# Dataloading

In [12]:
%%writefile -a dataset.py

import os

def load_dataset(name):
    data_folders = os.listdir(path_to_dataset)
    available = list(filter(lambda x : "my" == x[:2], data_folders))
    if not any(name == x for x in available):
        Exception("Dataset not found")

    ret = {}

    for folder in os.listdir(os.path.join(path_to_dataset, name)):

        try :        
            cur = {}

            cur["name"] = folder
            cur["gt"] = []
            folder_path = os.path.join(path_to_dataset, name, folder)
            with open(os.path.join(folder_path, "groundtruth.txt")) as f:
                for line in f:
                    cur["gt"].append([int(float(x)) for x in line.split(",")])
            
            cur["image_files"] = list(map(lambda x : os.path.join(os.path.join(folder_path, "img"), x), os.listdir(os.path.join(folder_path, "img"))))

            if len(cur["gt"]) != len(cur["image_files"]):
                print("Error while loading dataset", folder, "gt and image files have different length")
            else:
                ret[folder] = cur

        except:
            print("Error while loading dataset", folder)

    return ret

Appending to dataset.py


In [13]:
%%writefile -a dataset.py

def load_datasets():
    ret = {}
    for folder in os.listdir(path_to_dataset):
        if "my" == folder[:2]:
            ret[folder] = load_dataset(folder)
    return ret


Appending to dataset.py


In [1]:
from dataset import load_datasets

In [2]:
ds = load_datasets()

Error while loading dataset David gt and image files have different length
Error while loading dataset Football1 gt and image files have different length
Error while loading dataset Jogging1 gt and image files have different length
Error while loading dataset Jogging2 gt and image files have different length
Error while loading dataset Subway gt and image files have different length


In [None]:
ds