In [None]:
# default_exp category_tools

In [None]:
# hide
from nbdev.showdoc import *

In [None]:
# export
import argparse
import logging
from os.path import isfile
from aiforce.dataset.type import DatasetType

In [None]:
# hide
%reload_ext autoreload
%autoreload 2

In [None]:
# export
DEFAULT_CATEGORIES_FILE = 'categories.txt'
NOT_CATEGORIZED = '[NOT_CATEGORIZED]'
BACKGROUND_CLASS = '_background_'
BACKGROUND_CLASS_CODE = 0

In [None]:
# export

logger = logging.getLogger(__name__)

# Category File Tools
> Category File Tools Notes

Category names of an image-set or data-set are written in plain text, separated by a *new line* and stored in a file `categories.txt`.
The order of the category names is important and reflects the index of the result vector from the model for easy mapping.

In [None]:
# export
def read_categories(categories_file=None, dataset_type=DatasetType.IMAGE_CLASSIFICATION):
    """
    Reads the categories from a categories file.
    If the dataset type is image segmentation or object detection, a background class at index 0 is prepend.
    If the optional `categories_file` is not given, the file name *categories.txt* is used by default
    `categories_file`: the categories file name, if not the default
    `dataset_type`: the type of the data-set to create the categories for
    return: a list of the category names
    """
    if categories_file is None:
        categories_file = DEFAULT_CATEGORIES_FILE

    if not isfile(categories_file):
        logger.warning('Categories file not found at: {}'.format(categories_file))
        return []
    with open(categories_file) as f:
        categories = f.read().strip().split('\n')
        logger.info('Read {} categories from categories file at: {}'.format(len(categories), categories_file))
    if dataset_type in [DatasetType.IMAGE_OBJECT_DETECTION, DatasetType.IMAGE_SEGMENTATION]:
        categories = [BACKGROUND_CLASS] + categories
        logger.info('Prepend background class {} to the categories'.format(BACKGROUND_CLASS))

    return categories

In [None]:
# export
def write_categories(categories, categories_file=None):
    """
    Write the categories to a categories file.
    If the dataset type is image segmentation or object detection, a background class at index 0 is prepend.
    If the optional `categories_file` is not given, the file name *categories.txt* is used by default
    `categories`: a list of the category names to write
    `categories_file`: the categories file name
    """
    if categories_file is None:
        categories_file = DEFAULT_CATEGORIES_FILE

    if len(categories) > BACKGROUND_CLASS_CODE and categories[BACKGROUND_CLASS_CODE] == BACKGROUND_CLASS:
        logger.info('Remove background class {} from the categories'.format(BACKGROUND_CLASS))
        categories = categories[1:]
    with open(categories_file, 'w') as f:
        f.write('\n'.join(categories))
        logger.info('Write {} categories to categories file at: {}'.format(len(categories), categories_file))

## Helper Methods

In [None]:
# export
def configure_logging(logging_level=logging.INFO):
    """
    Configures logging for the system.

    :param logging_level: The logging level to use.
    """
    logging.basicConfig(level=logging_level)

## Run from command line

To run the data-set builder from command line, use the following command:
`python -m mlcore.category_tools [parameters]`

The following parameters are supported:
- `[categories]`: The path to the categories file. (e.g.: *categories.txt*)

In [None]:
# export
if __name__ == '__main__' and '__file__' in globals():
    # for direct shell execution
    configure_logging()

    parser = argparse.ArgumentParser()
    parser.add_argument("categories",
                        help="The path to the categories file.")

    args = parser.parse_args()

    print(read_categories(args.categories))

In [None]:
# hide

# for generating scripts from notebook directly
from nbdev.export import notebook2script
notebook2script()

Converted annotation-core.ipynb.
Converted annotation-folder_category_adapter.ipynb.
Converted annotation-multi_category_adapter.ipynb.
Converted annotation-via_adapter.ipynb.
Converted annotation-yolo_adapter.ipynb.
Converted annotation_converter.ipynb.
Converted annotation_viewer.ipynb.
Converted category_tools.ipynb.
Converted core.ipynb.
Converted dataset-core.ipynb.
Converted dataset-image_classification.ipynb.
Converted dataset-image_object_detection.ipynb.
Converted dataset-image_segmentation.ipynb.
Converted dataset-type.ipynb.
Converted dataset_generator.ipynb.
Converted evaluation-core.ipynb.
Converted geometry.ipynb.
Converted image-color_palette.ipynb.
Converted image-inference.ipynb.
Converted image-opencv_tools.ipynb.
Converted image-pillow_tools.ipynb.
Converted image-tools.ipynb.
Converted index.ipynb.
Converted io-core.ipynb.
Converted tensorflow-tflite_converter.ipynb.
Converted tensorflow-tflite_metadata.ipynb.
Converted tensorflow-tfrecord_builder.ipynb.
Converted t