In [None]:
# default_exp category_tools

In [None]:
# hide
from nbdev.showdoc import *

In [None]:
# export

import sys
import argparse
import logging
from mlcore.core import Type

In [None]:
# hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# export

DEFAULT_CATEGORIES_FILE = 'categories.txt'
NOT_CATEGORIZED = '[NOT_CATEGORIZED]'
BACKGROUND_CLASS = '_background_'
BACKGROUND_CLASS_CODE = 0

In [None]:
# export

logger = logging.getLogger(__name__)

# Category File Tools
> Category File Tools Notes

Category names of an image-set or data-set are written in plain text, separated by a *new line* and stored in a file `categories.txt`.
The order of the category names is important and reflects the index of the result vector from the model for easy mapping.

In [None]:
# export


def read_categories(categories_file=None, data_set_type=Type.IMAGE_CLASSIFICATION):
    """
    Reads the categories from a categories file.
    If the data-set type is image segmentation or object detection, a background class at index 0 is prepend.
    If the optional `categories_file` is not given, the file name *categories.txt* is used by default
    `categories_file`: the categories file name, if not the default
    `data_set_type`: the type of the data-set to create the categories for
    return: a list of the category names
    """
    if categories_file is None:
        categories_file = DEFAULT_CATEGORIES_FILE

    with open(categories_file) as f:
        categories = f.read().strip().split('\n')
    if data_set_type in [Type.IMAGE_OBJECT_DETECTION, Type.IMAGE_SEGMENTATION]:
        categories = [BACKGROUND_CLASS] + categories
    return categories

## Helper Methods

In [None]:
# export


def configure_logging(logging_level=logging.INFO):
    """
    Configures logging for the system.

    :param logging_level: The logging level to use.
    """
    logger.setLevel(logging_level)

    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging_level)

    logger.addHandler(handler)

## Run from command line

To run the data-set builder from command line, use the following command:
`python -m mlcore.category_tools [parameters]`

The following parameters are supported:
- `[categories]`: The path to the categories file. (e.g.: *categories.txt*)

In [None]:
# export


if __name__ == '__main__' and '__file__' in globals():
    # for direct shell execution
    configure_logging()

    parser = argparse.ArgumentParser()
    parser.add_argument("categories",
                        help="The path to the categories file.")

    args = parser.parse_args()

    categories = read_categories(args.categories)
    print(categories)
