In [5]:
import pandas as pd
import numpy as np

In [2]:
CLASSES_TO_EXCLUDE=[
    "Sliding Two Fingers Left",
    "Sliding Two Fingers Right",
    "Sliding Two Fingers Down",
    "Sliding Two Fingers Up",
    "Turning Hand Clockwise",
    "Turning Hand Counterclockwise",
    "Zooming In With Full Hand",
    "Zooming Out With Full Hand",
    "Zooming In With Two Fingers",
    "Zooming Out With Two Fingers"
]


In [3]:
import os
ANNOTATION_DIR = "D:/MachineLearning/Datasets/Jester"

labels_path = os.path.join(ANNOTATION_DIR, "jester-v1-labels.csv")
new_labels_path = os.path.join(ANNOTATION_DIR, "jester-v1-labels_17_classes.csv")

val_path = os.path.join(ANNOTATION_DIR, "jester-v1-validation.csv")
train_path = os.path.join(ANNOTATION_DIR, "jester-v1-train.csv")

new_train_path = os.path.join(ANNOTATION_DIR, "jester-v1-train_17_classes.csv")
new_val_path = os.path.join(ANNOTATION_DIR, "jester-v1-validation_17_classes.csv")

Check if all given exclude classes are valid

In [31]:
with open(labels_path, "r") as lb_f:
    labels = [line.rstrip() for line in lb_f]
    # check if all class names to exclude are valid
    for exclude_class_name in CLASSES_TO_EXCLUDE:
        if exclude_class_name not in labels:
            raise ValueError("Exclude class name {} does not exist".format(exclude_class_name))

Create new annotation files (validation annotation not available)


In [5]:
with open(labels_path, "r") as lb_f:
    with open(new_labels_path, "w+") as new_lb_f:
        labels = [line.rstrip() for line in lb_f]
        for label in labels:
            if label not in CLASSES_TO_EXCLUDE:
                new_lb_f.writelines(label + "\n")

In [37]:
from tqdm import tqdm

def remove_class_annotation(original_path, new_path):
    with open(original_path, "r") as og_file:
        with open(new_path, "w+") as new_file:
            for line in tqdm(og_file):
                splits = line.split(";")
                if splits[1].rstrip() not in CLASSES_TO_EXCLUDE:
                    new_file.writelines(line)

In [38]:
remove_class_annotation(train_path, new_train_path)

118562it [00:01, 100745.22it/s]


In [41]:
remove_class_annotation(val_path, new_val_path)


14787it [00:00, 103723.53it/s]


**ZIPing the dataset**  <br />
Dataset is split into volumes, each volume containing videos up to a certain index
(colab doesn't like a lot of files in a single directory)

In [46]:
from datasets.jester import VOLUME_SPLITS
import zipfile
from zipfile import ZipFile
from pathlib import Path

DATASET_DIR = "D:/MachineLearning/Datasets/Jester/20bn-jester-v1"
ZIP_DIR = "D:/MachineLearning/Datasets/Jester/zips"

def store_folder(zip_obj:ZipFile, folder_path):
    folder_name = os.path.basename(folder_path)
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        relative_path = os.path.join(folder_name, file_name)
        zip_obj.write(file_path, relative_path)

def zip_volume(video_index_start, video_index_end, volume_index):
    """
    :param video_index_start: inclusive
    :param video_index_end: inclusive
    """
    volume_name = "vol_{}.zip".format(volume_index)
    volume_path = os.path.join(ZIP_DIR, volume_name)

    with ZipFile(volume_path, mode="x", compression=zipfile.ZIP_STORED) as zip_obj:
        with open(new_train_path, 'r') as train_file:
            for line in train_file:
                splits = line.split(';')
                index = int(splits[0])

                if video_index_start <= index <= video_index_end:
                    folder_path = os.path.join(DATASET_DIR, str(index))
                    store_folder(zip_obj, folder_path)

        with open(new_val_path, 'r') as val_file:
            for line in val_file:
                splits = line.split(';')
                index = int(splits[0])

                if video_index_start <= index <= video_index_end:
                    folder_path = os.path.join(DATASET_DIR, str(index))
                    store_folder(zip_obj, folder_path)

import time

# print("Started zipping")
# starting_vid_idx = 1
# for volume_index in range(len(VOLUME_SPLITS)):
#     ending_vid_idx = VOLUME_SPLITS[volume_index] - 1
#     time_start = time.time()
#
#     zip_volume(starting_vid_idx, ending_vid_idx, volume_index)
#
#     duration = time.time() - time_start
#     print("Done with zipping vol {} in {} seconds".format(volume_index, duration))
#     starting_vid_idx = VOLUME_SPLITS[volume_index]

starting_vid_idx = VOLUME_SPLITS[-1]
ending_vid_idx = len(os.listdir(DATASET_DIR))
time_start = time.time()
volume_index = len(VOLUME_SPLITS)
zip_volume(starting_vid_idx, ending_vid_idx, volume_index)
duration = time.time() - time_start
print("Done with zipping vol {} in {} seconds".format(volume_index, duration))

Done with zipping vol 6 in 536.7640151977539 seconds


Create new Ind.txt files

In [9]:
def label_csv_to_label_ind(label_csv_path, class_ind_path):
    label_dict = dict()
    with open(label_csv_path, "r") as  lb_f:
        with open(class_ind_path, "w+") as class_ind_f:
            class_index = 1
            labels = [line.rstrip() for line in lb_f]
            for label in labels:
                new_label = label.replace(" ", "_")
                class_ind_f.writelines("{} {}\n".format(class_index, new_label))
                label_dict[label] = class_index
                class_index += 1

    return label_dict

def annotation_csv_to_ind(annotation_csv_path, ind_path, label_dict):
    with open(annotation_csv_path, "r") as a_f:
        with open(ind_path, "w+") as i_f:
            for line in a_f:
                video_ind, class_name = line.split(";")
                class_name = class_name.rstrip()
                class_ind = label_dict[class_name]

                i_f.write("{} {}\n".format(video_ind, class_ind))

class_ind_path = os.path.join(ANNOTATION_DIR, "classInd_17_classes.txt")
trainlist_path = os.path.join(ANNOTATION_DIR, "trainlist_17_classes.txt")
vallist_path = os.path.join(ANNOTATION_DIR, "vallist_17_classes.txt")

label_map  = label_csv_to_label_ind(new_labels_path, class_ind_path)
annotation_csv_to_ind(new_train_path, trainlist_path, label_map)
annotation_csv_to_ind(new_val_path, vallist_path, label_map)

In [13]:
from utils.jester_json import convert_jester_csv_to_activitynet_json

dest_JSON = os.path.join(ANNOTATION_DIR, "jester_17_classes.json")
convert_jester_csv_to_activitynet_json(class_ind_path, trainlist_path, vallist_path, dest_JSON)

ModuleNotFoundError: No module named 'utils.jester_json'; 'utils' is not a package