In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2

In [2]:
MAIN_PATH = '/home/rex/dataset/vidvrd-dataset/'

In [3]:
def read_json_as_dict(json_path):
    import json
    with open(json_path, 'r') as f:
        return json.load(f)

In [4]:
def find_class_ids(root_dir, dataset_portion, file_lists, class_ids = {}):
    for annotation_file_name in file_lists:
        annotation_file_path = root_dir + dataset_portion + annotation_file_name 
        _dict = read_json_as_dict(annotation_file_path)

        for subject in _dict['subject/objects']:
            if subject['category'] not in class_ids:
                class_ids[subject['category']] = len(class_ids)
    
    return class_ids

In [5]:
def save_list_to_file(file_path, file_lists):
    images_folder = "/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/images/"
    # get all files in the folder
    files = os.listdir(images_folder)
    with open(file_path, 'w') as f:
        for annotation_file_name in file_lists:
            for file in files:
                if annotation_file_name.split('.')[0] in file:
                    f.write(images_folder + file + '\n')

In [6]:
def create_pytorchYoloAnnotationFromFileNames(root_dir, dataset_portion, file_lists, classes):
    for annotation_file_name in file_lists:
        annotation_file_path = root_dir + dataset_portion + annotation_file_name 
        _dict = read_json_as_dict(annotation_file_path)

        original_width = _dict['width']
        original_height = _dict['height']

        scaled_width = 416
        scaled_height = 416

        width_ratio = scaled_width / original_width
        height_ratio = scaled_height / original_height
        
        for index_trajectory, trajectory in enumerate(_dict['trajectories']):
            found_labels = False
            for object in trajectory:
                label_idx_file = object['tid']
                label_file = None

                for subject in _dict['subject/objects']:
                    if subject['tid'] == label_idx_file:
                        label_file = subject['category']
                        break

                if label_file is not None:
                    found_labels = True
                    label_idx = classes[label_file]
                    xmin = object['bbox']['xmin'] * width_ratio
                    ymin = object['bbox']['ymin'] * height_ratio
                    xmax = object['bbox']['xmax'] * width_ratio
                    ymax = object['bbox']['ymax'] * height_ratio

                    x_center = (xmin + xmax) / 2 / scaled_width
                    y_center = (ymin + ymax) / 2 / scaled_height
                    width = (xmax - xmin) / scaled_width
                    height = (ymax - ymin) / scaled_height

                    save_path = f"/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/labels/{annotation_file_name.split('.json')[0]}_{index_trajectory}.txt"
                    with open(save_path, 'a') as f:
                        f.write(f"{label_idx} {x_center} {y_center} {width} {height}\n")
            if not found_labels:
                # Create an empty label file if no object is found.
                save_path = f"/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/labels/{annotation_file_name.split('.json')[0]}_{index_trajectory}.txt"
                open(save_path, 'w').close()

        video_file_location = root_dir + 'videos/' + annotation_file_name.split(".json")[0] + '.mp4'
        cap = cv2.VideoCapture(video_file_location) 
        actual_fps = cap.get(cv2.CAP_PROP_FPS)
        target_fps = _dict["fps"]
        frame_count = _dict["frame_count"]
        frame_interval = int(actual_fps / target_fps)

        frame_idx = 0
        test_frames_count = 0
        while cap.isOpened():
            ret, frame = cap.read()

            if not ret or frame_idx >= frame_count:
                break

            if frame_idx % frame_interval == 0:
                frame_resized = cv2.resize(frame,(scaled_width,scaled_height),interpolation = cv2.INTER_AREA)
                # save frame_resized
                save_path = "/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/images/" + annotation_file_name.split(".json")[0] + "_" + str(test_frames_count) + ".jpg" 
                cv2.imwrite(save_path, frame_resized)
                test_frames_count += 1

            frame_idx += 1
                            
            
        cap.release()


In [7]:
# list of all files in MAIN_PATH/test with only file names
annotations_test_path = os.listdir(MAIN_PATH + 'test')
annotations_train_path = os.listdir(MAIN_PATH + 'train')

In [8]:
classes = find_class_ids(MAIN_PATH, 'test/', annotations_test_path)
classes = find_class_ids(MAIN_PATH, 'train/', annotations_train_path, classes)

# clean /home/matteo/Documents/postDoc/RexTorino/split_DNN_framework/pytorchyolo/data/custom/classes.names
open("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/classes.names", 'w').close()

# save classes to file /home/matteo/Documents/postDoc/RexTorino/split_DNN_framework/pytorchyolo/data/custom/classes.names 
with open("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/classes.names", 'a') as f:
    for key in classes:
        f.write(key + "\n")

In [9]:
create_pytorchYoloAnnotationFromFileNames(MAIN_PATH, 'test/', annotations_test_path, classes)

In [10]:
# create_pytorchYoloAnnotationFromFileNames(MAIN_PATH, 'train/', annotations_train_path, classes)

In [11]:
save_list_to_file('/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/test.txt', annotations_test_path)
# save_list_to_file('/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/train.txt', annotations_train_path)

In [12]:
# some images do not have labels, remove them
import os
import glob

# for file in glob.glob("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/labels/*.txt"):
#     os.remove(file)

# for file in glob.glob("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/images/*.jpg"):
#     os.remove(file)

# for file in glob.glob("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/images/*.jpg"):
#     label_file = file.split(".jpg")[0] + ".txt"
#     label_file = label_file.replace("images", "labels")
#     if not os.path.exists(label_file):
#         os.remove(file)

# remove non existing images from '/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/test.txt'
files = os.listdir("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/images/")
with open("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/test.txt", 'r') as f:
    lines = f.readlines()
    with open("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/test.txt", 'w') as f:
        for line in lines:
            if line.split("/")[-1].strip() in files:
                f.write(line)


# files = os.listdir("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/images/")
# with open("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/train.txt", 'r') as f:
#     lines = f.readlines()
#     with open("/home/rex/gitRepo/split_DNN_framework/pytorchyolo/data/vidvrd/train.txt", 'w') as f:
#         for line in lines:
#             if line.split("/")[-1].strip() in files:
#                 f.write(line)