In [None]:
# default_exp dataset.image_classification

In [None]:
# hide
from nbdev.showdoc import *

In [None]:
# export

import logging
from os.path import join
from aiforce.core import assign_arg_prefix
from aiforce.annotation.core import AnnotationAdapter
from aiforce.dataset.core import Dataset
from aiforce.tensorflow.tfrecord_builder import create_labelmap_file

In [None]:
# hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# export

logger = logging.getLogger(__name__)

# Dataset for image classification

> Creates a dataset for image classification. Single and multi label classification is supported.

Creating a data-set for a classification or segmentation task. If an annotation file is present, the annotations are also prepared.
The data-set is created based on an image-set.

## Image-Set

Image-sets are collected images to build a data-set from, stored in the `imagesets` folder.
The `imagesets` folder contains the following folder structure:
- imagesets/*[image_set_type]*/*[image_set_name]*

Inside the `[image_set_name]` folder are the following files / folders
- `test/`: test images (benchmark)
- `trainval/`: training and validation images for [cross validation](https://pdc-pj.backlog.jp/wiki/RAD_RAD/Neural+Network+-+Training)
- `categories.txt`: all categories (classes) the image-set contains

## Data-Set Folders

Data-sets are stored in the `datasets` base folder.
The `datasets` folder contains the following folder structure:
- datasets/*[data_set_type]*/*[data_set_name]*
where `[data_set_type]` is the same as the corresponding `[image_set_type]` and `[data_set_name]` is the same as the corresponding `[image_set_name]`.

Inside the `[data_set_name]` folder are the following files / folders
- `test/`: test set (benchmark)
- `train/`: training set
- `val/`: validation set
- `categories.txt`: all categories (classes) the data-set contains

## Create a classification data-set

Classification data-set can be created from a classification image-set. 
All images are validated, if they belong to one of the given categories. If categories with no images are found or images belong to a category not listed in `categories.txt`, the data-set can not be created.

In [None]:
# export


class ImageClassificationDataset(Dataset):
    """
    Classification dataset.
    """

    def __init__(self, input_adapter: AnnotationAdapter, output_adapter: AnnotationAdapter, split=None, seed=None,
                 sample=None, tfrecord=False):
        super().__init__(input_adapter, output_adapter, split, seed, sample)
        self.tfrecord = tfrecord

    @classmethod
    def argparse(cls, prefix=None):
        """
        Returns the argument parser containing argument definition for command line use.
        `prefix`: a parameter prefix to set, if needed
        return: the argument parser
        """
        parser = super(ImageClassificationDataset, cls).argparse(prefix=prefix)
        parser.add_argument(assign_arg_prefix("--tfrecord", prefix),
                            dest="tfrecord",
                            help="Also create .tfrecord files.",
                            action="store_true")
        return parser

    def copy(self, train_annotation_keys, val_annotation_keys, test_files=None):
        """
        Copy the images to the dataset and remove EXIF orientation information by hard-rotate the images.
        If tfrecords are build, generate a labelmap.pbtxt file.
        `train_annotation_keys`: The list of training annotation keys
        `val_annotation_keys`: The list of validation annotation keys
        `test_files`: The list of test file paths
        return: A tuple containing train, val and test target file paths
        """

        train_targets, val_targets, test_targets = super().copy(train_annotation_keys, val_annotation_keys, test_files)

        files = train_targets + val_targets + test_targets
        logger.info('Start assign image orientation to {} images'.format(len(files)))
        for file in files:
            self.assign_orientation(file)
        logger.info('Finished assign image orientation to {} images'.format(len(files)))

        # if create tfrecord, create a labelmap.pbtxt file containing the categories
        if self.tfrecord:
            labelmap_file_name = 'label_map.pbtxt'
            labelmap_output_file = join(self.output_adapter.path, labelmap_file_name)
            logger.info('Generate {}'.format(labelmap_output_file))
            create_labelmap_file(labelmap_output_file, list(self.categories), 1)

        return train_targets, val_targets, test_targets

    def build_info(self):
        """
        Converts annotations
        """
        super().build_info()
        logger.info('create_tfrecord: {}'.format(self.tfrecord))


In [None]:
# hide

# for generating scripts from notebook directly
from nbdev.export import notebook2script
notebook2script()

Converted annotation-core.ipynb.
Converted annotation-folder_category_adapter.ipynb.
Converted annotation-multi_category_adapter.ipynb.
Converted annotation-via_adapter.ipynb.
Converted annotation-yolo_adapter.ipynb.
Converted annotation_converter.ipynb.
Converted annotation_viewer.ipynb.
Converted category_tools.ipynb.
Converted core.ipynb.
Converted dataset-core.ipynb.
Converted dataset-image_classification.ipynb.
Converted dataset-image_object_detection.ipynb.
Converted dataset-image_segmentation.ipynb.
Converted dataset-type.ipynb.
Converted dataset_generator.ipynb.
Converted evaluation-core.ipynb.
Converted geometry.ipynb.
Converted image-color_palette.ipynb.
Converted image-inference.ipynb.
Converted image-opencv_tools.ipynb.
Converted image-pillow_tools.ipynb.
Converted image-tools.ipynb.
Converted index.ipynb.
Converted io-core.ipynb.
Converted tensorflow-tflite_converter.ipynb.
Converted tensorflow-tflite_metadata.ipynb.
Converted tensorflow-tfrecord_builder.ipynb.
Converted t