In [1]:
# create image ids for train and test
# create category ids
# collect annotations in coco format
# convert it to my format
# save all images and annotations to the train and test output directories

In [2]:
import os
import shutil
import numpy as np
import pandas as pd

import torch

from yolov3.coco import convert_coco_annotations
from yolov3.data import save_index, json_dump

In [3]:
rtsd_path = '/media/semyon/Data/Documents/mipt/1C/state-exam/data'
rtsd_frames = os.path.join(rtsd_path, 'rtsd-d3-frames')
rtsd_gt = os.path.join(rtsd_path, 'rtsd-d3-gt')
output_dir = '/media/semyon/Data/Documents/rtsd'

In [4]:
def create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)

def get_path(root, *subdirs):
    return os.path.join(root, *subdirs)

In [15]:
create_dir(output_dir)

In [7]:
def invert_index(index):
    return {v: k for k, v in index.items()}


def create_image_index(rtsd_frames_path, prefix, start=1):
    img_id = start
    img_index = {}
    frames_path = os.path.join(rtsd_frames_path, prefix)
    for file_name in os.listdir(frames_path):
        img_index[img_id] = file_name
        img_id += 1
    return img_index


def create_category_index(rtsd_gt_path, start=1):
    cat_id = start
    cat_index = {}
    for cat in os.listdir(rtsd_gt_path):
        cat_path = os.path.join(rtsd_gt_path, cat)
        if not os.path.isdir(cat_path):
            continue
        cat_index[cat_id] = cat
        cat_id += 1
    return cat_index

In [8]:
def collect_annotations(rtsd_gt_path, prefix, image_index_inv, category_index_inv):
    ann_id = 1
    annotations = []
    for cat in os.listdir(rtsd_gt_path):
        cat_path = os.path.join(rtsd_gt_path, cat)
        if not os.path.isdir(cat_path):
            continue
        anns_path = os.path.join(cat_path, f'{prefix}_gt.csv')
        anns_df = pd.read_csv(anns_path)
        for idx, row in anns_df.iterrows():
            img_id = image_index_inv[row.filename]
            cat_id = category_index_inv[cat]
            bbox = [row.x_from, row.y_from, row.width, row.height]
            a = {
                'id': ann_id,
                'image_id': img_id,
                'bbox': bbox,
                'category_id': cat_id
            }
            ann_id += 1
            annotations.append(a)
    return annotations


def create_annotation_tensors(annotations, image_index, category_index):
    annotation_tensors = convert_coco_annotations(
        {'annotations': annotations},
        {k: k for k, v in category_index.items()}
    )
    for k in image_index:
        if k not in annotation_tensors:
            annotation_tensors[k] = torch.empty(0, 6, dtype=torch.float32)
    return annotation_tensors

In [20]:
def save_annotation_tensors(annotation_tensors, output_dir, image_index):
    ann_index = {}
    for k in image_index:
        ann = annotation_tensors[k]
        img_file_name = image_index[k]
        ann_file_name = os.path.splitext(img_file_name)[0] + '.pt'
        ann_file_path = get_path(output_dir, ann_file_name)
        ann_index[k] = ann_file_name
        torch.save(ann, ann_file_path)
    index_path = get_path(output_dir, 'index.json')
    save_index(ann_index, index_path)
    

def copy_images(rtsd_frames_path, prefix, output_dir, image_index):
    for k in image_index:
        file_name = image_index[k]
        output_path = get_path(output_dir, file_name)
        input_path = get_path(rtsd_frames_path, prefix, file_name)
        shutil.copyfile(input_path, output_path)
    index_path = get_path(output_dir, 'index.json')
    save_index(image_index, index_path)

In [22]:
prefix = 'test'

In [23]:
img_index = create_image_index(rtsd_frames, prefix)
img_index_inv = invert_index(img_index)

In [24]:
cat_index = create_category_index(rtsd_gt)
cat_index_inv = invert_index(cat_index)

In [25]:
annotations = collect_annotations(rtsd_gt, prefix, img_index_inv, cat_index_inv)

In [26]:
annotation_tensors = create_annotation_tensors(annotations, img_index, cat_index)

In [27]:
create_dir(get_path(output_dir, prefix))
create_dir(get_path(output_dir, prefix, 'annotations'))

In [28]:
save_annotation_tensors(annotation_tensors, get_path(output_dir, prefix, 'annotations'), img_index)

In [29]:
create_dir(get_path(output_dir, prefix, 'images'))

In [30]:
copy_images(rtsd_frames, prefix, get_path(output_dir, prefix, 'images'), img_index)