In [None]:
# default_exp annotation.folder_category_adapter

In [None]:
# hide

from nbdev.showdoc import *

In [None]:
# export

import argparse
import logging
import shutil
from os.path import join, normpath, sep, basename, isfile
from mlcore.io.core import create_folder, scan_files
from mlcore.annotation.core import Annotation, AnnotationAdapter, Region

In [None]:
# hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# export

DEFAULT_CATEGORY_FOLDER_INDEX = -1

In [None]:
# export

logger = logging.getLogger(__name__)

# Folder Category Annotation Adapter
> Adapter to read and write annotations where the folder structure represents the categories.

The `args` parameter contains the following options:
- `files_path`: the path to the folder containing subfolders as category label with source files (e.g.: *data/segmentation/my_collection/trainval*)
- `category_index`: The folder index, representing the category, default to *-1* (the parent folder of the file)

In [None]:
# export


class FolderCategoryAdapter(AnnotationAdapter):
    """
    Adapter to read and write annotations where the folder structure represents the categories.
    `args`: the arguments containing the parameters
    """

    def __init__(self, args):
        super().__init__(args)
        self.files_path = args.files_path
        self.category_index = args.category_index if args.category_index is not None else DEFAULT_CATEGORY_FOLDER_INDEX

    def read(self):
        """
        Read annotations from folder structure representing the categories.
        return: the annotations as dictionary
        """
        annotations = {}

        logger.info('Read annotations from {}'.format(self.files_path))

        file_paths = scan_files(self.files_path)

        skipped_annotations = []
        for file_path in file_paths:
            trimmed_path = self._trim_base_path(file_path, self.files_path)
            if trimmed_path not in annotations:
                annotations[trimmed_path] = Annotation(annotation_id=trimmed_path, file_path=file_path)
            annotation = annotations[trimmed_path]

            path_split = normpath(trimmed_path).lstrip(sep).split(sep)

            if len(path_split) <= abs(self.category_index):
                logger.warning("{}: No category folder found, skip annotation.".format(trimmed_path))
                skipped_annotations.append(file_path)
                continue

            category = path_split[self.category_index - 1]
            region = Region(labels=[category])
            annotation.regions.append(region)

        logger.info('Finished read annotations')
        logger.info('Annotations read: {}'.format(len(annotations)))
        if skipped_annotations:
            logger.info('Annotations skipped: {}'.format(len(skipped_annotations)))
        return annotations

    def write(self, annotations):
        """
        Write annotations to folder structure representing the categories.
        The category folder is created, if not exist, and corresponding files are copied into the labeled folder.
        `annotations`: the annotations to write
        """
        logger.info('Write annotations to {}'.format(self.files_path))
        skipped_annotations = []
        for annotation in annotations.values():
            if not isfile(annotation.file_path):
                logger.warning("{}: Source file not found, skip annotation.".format(annotation.file_path))
                skipped_annotations.append(annotation.file_path)
                continue

            skipped_labels = []
            annotation_labels = annotation.labels()
            for label in annotation_labels:
                category_folder = create_folder(join(self.files_path, label))
                target_file = join(category_folder, basename(annotation.file_path))
                if isfile(target_file):
                    logger.warning("{}: Target file already exist, skip label {}.".format(annotation.file_path, label))
                    skipped_labels.append(label)
                    continue
                # copy the file
                shutil.copy2(annotation.file_path, target_file)
            if len(skipped_labels) == len(annotation_labels):
                logger.warning("{}: All labels skipped, skip annotation.".format(annotation.file_path))
                skipped_annotations.append(annotation.file_path)

        logger.info('Finished write annotations')
        logger.info('Annotations written: {}'.format(len(annotations) - len(skipped_annotations)))
        if skipped_annotations:
            logger.info('Annotations skipped: {}'.format(len(skipped_annotations)))

    @classmethod
    def _trim_base_path(cls, file_path, base_path):
        """
        Trims the base path from a file path.
        `file_path`: the file path to trim from
        `base_path`: the base path to trim
        return: the trimmed file path
        """
        if file_path.startswith(base_path):
            file_path = file_path[len(base_path):]
        return file_path

    @classmethod
    def argparse(cls, prefix=None):
        """
        Returns the argument parser containing argument definition for command line use.
        `prefix`: a parameter prefix to set, if needed
        return: the argument parser
        """
        parser = argparse.ArgumentParser()
        parser.add_argument(cls.assign_prefix('--files_path', prefix),
                            dest="files_path",
                            help="The path to the folder containing the files.",
                            required=True)
        parser.add_argument(cls.assign_prefix('--category_index', prefix),
                            dest="category_index",
                            help="The folder index, representing the category.",
                            type=int,
                            default=None)
        return parser

In [None]:
show_doc(FolderCategoryAdapter.read)
show_doc(FolderCategoryAdapter.write)
show_doc(FolderCategoryAdapter.argparse)

In [None]:
# hide

# for generating scripts from notebook directly
from nbdev.export import notebook2script
notebook2script()

Converted annotation-core.ipynb.
Converted annotation-folder_category_adapter.ipynb.
Converted annotation-multi_category_adapter.ipynb.
Converted annotation-via_adapter.ipynb.
Converted annotation-yolo_adapter.ipynb.
Converted annotation_converter.ipynb.
Converted annotation_viewer.ipynb.
Converted category_tools.ipynb.
Converted core.ipynb.
Converted dataset-core.ipynb.
Converted dataset-image_classification.ipynb.
Converted dataset-image_object_detection.ipynb.
Converted dataset-image_segmentation.ipynb.
Converted dataset-type.ipynb.
Converted dataset_generator.ipynb.
Converted evaluation-core.ipynb.
Converted geometry.ipynb.
Converted image-color_palette.ipynb.
Converted image-inference.ipynb.
Converted image-opencv_tools.ipynb.
Converted image-pillow_tools.ipynb.
Converted image-tools.ipynb.
Converted index.ipynb.
Converted io-core.ipynb.
Converted tensorflow-tflite_converter.ipynb.
Converted tensorflow-tflite_metadata.ipynb.
Converted tensorflow-tfrecord_builder.ipynb.
Converted t