In [None]:
# default_exp annotation.via_adapter

In [None]:
# hide
from nbdev.showdoc import *

In [None]:
# export

import json
import csv
import sys
import argparse
import logging
from os.path import join, splitext
from mlcore.annotation.core import Annotation, Region, RegionShape, parse_region_shape, create_annotation_id

In [None]:
# hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# export

DEFAULT_ANNOTATIONS_FILE = 'via_region_data.json'
DEFAULT_CATEGORY_ID = 'category'

In [None]:
# export

logger = logging.getLogger(__name__)

# VIA Annotation Adapter
> VIA annotation adapter. To use the VIA annotation tool, refer to the [Homepage](http://www.robots.ox.ac.uk/~vgg/software/via/via-1.0.6.html).

Current supported annotations:
- circle
- ellipse
- point
- polyline
- rectangle

In [None]:
# export


def read_annotations(annotations_file, files_source, category_id=DEFAULT_CATEGORY_ID):
    """
    Reads a VIA annotations file.
    Supports JSON and CSV file format.
    `annotations_file`: the path to the annotation file to read
    `files_source`: the path to the folder containing the source files
    `category_id`: the ID of the category label
    return: the annotations
    """
    return _read_annotations_v1(annotations_file, files_source, category_id)

In [None]:
# export


def write_annotations(annotations_file, annotations, category_id=DEFAULT_CATEGORY_ID):
    """
    Writes a VIA annotations file.
    Supports JSON and CSV file format.
    `annotations_file`: the path to the annotation file to write
    `annotations`: the annotations to write
    `category_id`: the ID of the category label
    """
    _write_annotations_v1(annotations_file, annotations, category_id)

In [None]:
# export


def _parse_region_shape_attributes(region_shape_attributes):
    """
    Parse region shape attributes.
    `region_shape_attributes`: the region shape atributes as dictionary
    return: the corresponding annotation
    """
    if not region_shape_attributes:
        return Annotation()

    region_shape = parse_region_shape(region_shape_attributes['name'])
    points_x = None
    points_y = None
    radius_x = 0
    radius_y = 0
    if region_shape == RegionShape.CIRCLE:
        points_x = [region_shape_attributes['cx']]
        points_y = [region_shape_attributes['cy']]
        radius_x = region_shape_attributes['r']
        radius_y = region_shape_attributes['r']
    elif region_shape == RegionShape.ELLIPSE:
        points_x = [region_shape_attributes['cx']]
        points_y = [region_shape_attributes['cy']]
        radius_x = region_shape_attributes['rx']
        radius_y = region_shape_attributes['ry']
    elif region_shape == RegionShape.POINT:
        points_x = [region_shape_attributes['cx']]
        points_y = [region_shape_attributes['cy']]
    elif region_shape == RegionShape.POLYGON:
        points_x = region_shape_attributes['all_points_x']
        points_y = region_shape_attributes['all_points_y']
    elif region_shape == RegionShape.RECTANGLE:
        x = region_shape_attributes['x']
        y = region_shape_attributes['y']
        width = region_shape_attributes['width']
        height = region_shape_attributes['height']
        points_x = [x, x + width]
        points_y = [y, y + height]
    return Region(shape=region_shape, points_x=points_x, points_y=points_y, radius_x=radius_x, radius_y=radius_y)

In [None]:
# export


def _create_region_shape_attributes(region: Region):
    """
    Create region shape attributes.
    `region`: the region to create region shape attributes from
    return: the corresponding region shape attributes as dictionary
    """
    region_shape_attributes = {
        "name": str(region.shape),

    }
    c_x = region.points_x[0] if len(region.points_x) else 0
    c_y = region.points_y[0] if len(region.points_y) else 0

    if region.shape == RegionShape.CIRCLE:
        region_shape_attributes['cx'] = c_x
        region_shape_attributes['cy'] = c_y
        region_shape_attributes['r'] = max(region.radius_x, region.radius_y)
    elif region.shape == RegionShape.ELLIPSE:
        region_shape_attributes['cx'] = c_x
        region_shape_attributes['cy'] = c_y
        region_shape_attributes['rx'] = region.radius_x
        region_shape_attributes['ry'] = region.radius_y
    elif region.shape == RegionShape.POINT:
        region_shape_attributes['cx'] = c_x
        region_shape_attributes['cy'] = c_y
    elif region.shape == RegionShape.POLYGON:
        region_shape_attributes['all_points_x'] = region.points_x
        region_shape_attributes['all_points_y'] = region.points_y
    elif region.shape == RegionShape.RECTANGLE:
        region_shape_attributes['x'] = region.points_x[0]
        region_shape_attributes['y'] = region.points_y[0]
        region_shape_attributes['width'] = region.points_x[1] - region.points_x[0]
        region_shape_attributes['height'] = region.points_y[1] - region.points_y[0]
    return region_shape_attributes

## VIA Annotation Tool v1
Read or write annotation files made with the [VIA Annotation Tool v1](http://www.robots.ox.ac.uk/~vgg/software/via/via-1.0.6.html).
Supported annotation file format are JSON and CSV.

In [None]:
# export


def _read_annotations_v1(annotations_file, files_source, category_id=DEFAULT_CATEGORY_ID):
    """
    Reads a VIA v1 annotations file.
    Supports JSON and CSV file format.
    `annotations_file`: the path to the annotation file to read
    `files_source`: the path to the folder containing the source files
    `category_id`: the ID of the category label
    return: the annotations as dictionary
    """
    file_annotations = None
    file_extension = splitext(annotations_file)[1]

    if file_extension.lower() == '.json':
        file_annotations = _read_annotations_json_v1(annotations_file, files_source, category_id)
    elif file_extension.lower() == '.csv':
        file_annotations = _read_annotations_csv_v1(annotations_file, files_source, category_id)
    else:
        message = 'Unsupported annotation format at {}'.format(annotations_file)
        logger.error(message)
        raise ValueError(message)

    return file_annotations

In [None]:
# export


def _read_annotations_csv_v1(annotations_file, files_source, category_id=DEFAULT_CATEGORY_ID):
    """
    Reads a VIA v1 CSV annotations file.
    `annotations_file`: the path to the CSV annotation file to read
    `files_source`: the path to the folder containing the source files
    `category_id`: the ID of the category label
    return: the annotations as dictionary
    """
    annotations = {}

    with open(annotations_file, newline='') as csvfile:
        reader = csv.DictReader(csvfile)

        for row in reader:
            file_path = join(files_source, row['#filename'])
            annotation_id = create_annotation_id(file_path)
            if annotation_id is None:
                logger.warning('File not found, skip annotations at path: {}'.format(file_path))
                continue

            if annotation_id not in annotations:
                file_name = row['#filename']
                file_size = row['file_size']
                annotations[annotation_id] = Annotation(annotation_id=annotation_id, file_name=file_name,
                                                        file_size=file_size, file_path=file_path)

            region_shape_attributes = json.loads(row['region_shape_attributes'])
            region = _parse_region_shape_attributes(region_shape_attributes)
            region_attributes = json.loads(row['region_attributes'])
            category = region_attributes[category_id] if region_attributes and category_id in region_attributes else None
            region.labels = [category] if category else []
            annotations[annotation_id].regions.append(region)

    return annotations

In [None]:
# export


def _read_annotations_json_v1(annotations_file, files_source, category_id=DEFAULT_CATEGORY_ID):
    """
    Reads a VIA v1 JSON annotations file.
    `annotations_file`: the path to the JSON annotation file to read
    `files_source`: the path to the folder containing the source files
    `category_id`: the ID of the category label
    return: the file annotations as dictionary
    """
    file_annotations = {}

    with open(annotations_file) as json_file:
        annotations = json.load(json_file)

        for data in annotations.values():
            file_path = join(files_source, data['filename'])
            annotation_id = create_annotation_id(file_path)
            if annotation_id is None:
                logger.warning('File not found, skip annotations at path: {}'.format(file_path))
                continue

            if annotation_id not in annotations:
                file_name = data['filename']
                file_size = data['size']
                annotations[annotation_id] = Annotation(annotation_id=annotation_id, file_name=file_name,
                                                        file_size=file_size, file_path=file_path)

            annotation = file_annotations[annotation_id]

            for region_data in data['regions'].values():
                region_shape_attributes = region_data['shape_attributes']
                region = _parse_region_shape_attributes(region_shape_attributes)
                region_attributes = region_data['region_attributes']
                category = region_attributes[category_id] if region_attributes and category_id in region_attributes else None
                region.labels = [category] if category else []
                annotation.regions.append(annotation)

    return file_annotations

In [None]:
# export


def _write_annotations_v1(annotations_file, annotations, category_id=DEFAULT_CATEGORY_ID):
    """
    Writes a VIA v1 annotations file.
    Supports JSON and CSV file format.
    `annotations_file`: the path to the annotation file to write
    `annotations`: the annotations to write
    `category_id`: the ID of the category label
    """
    file_extension = splitext(annotations_file)[1]

    if file_extension.lower() == '.json':
        _write_annotations_json_v1(annotations_file, annotations, category_id)
    elif file_extension.lower() == '.csv':
        _write_annotations_csv_v1(annotations_file, annotations, category_id)
    else:
        message = 'Unsupported annotation format at {}'.format(annotations_file)
        logger.error(message)
        raise ValueError(message)

In [None]:
# export


def _write_annotations_csv_v1(annotations_file, annotations, category_id=DEFAULT_CATEGORY_ID):
    """
    Writes a VIA v1 CSV annotations file.
    `annotations_file`: the path to the CSV annotation file to write
    `annotations`: the annotations to write
    `category_id`: the ID of the category label
    """
    with open(annotations_file, 'w', newline='') as csvfile:
        fieldnames = ['#filename', 'file_size', 'file_attributes', 'region_count', 'region_id',
                      'region_shape_attributes', 'region_attributes']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for annotation in annotations.values():
            for index, region in enumerate(annotation.regions):
                region_shape_attributes = _create_region_shape_attributes(region)
                region_attributes = {
                    category_id: ' '.join(region.labels) if len(region.labels) else ''
                }
                writer.writerow({'#filename': annotation.file_name,
                                 'file_size': annotation.file_size,
                                 'file_attributes': '{}',
                                 'region_count': len(annotation.regions),
                                 'region_id': str(index),
                                 'region_shape_attributes': json.dumps(region_shape_attributes),
                                 'region_attributes': json.dumps(region_attributes)})

In [None]:
# export


def _write_annotations_json_v1(annotations_file, annotations, category_id=DEFAULT_CATEGORY_ID):
    """
    Writes a VIA v1 JSON annotations file.
    `annotations_file`: the path to the JSON annotation file to write
    `annotations`: the annotations to write
    `category_id`: the ID of the category label
    """
    via_annotations = {}

    for annotation in annotations.values():
        file_id = '{:s}{:d}'.format(annotation.file_name, annotation. file_size)
        regions = {}
        for index, region in enumerate(annotation.regions):
            regions[str(index)] = {
                'shape_attributes': _create_region_shape_attributes(region),
                'region_attributes': {
                    category_id: ' '.join(region.labels) if len(region.labels) else ''
                }
            }
        via_annotations[file_id] = {
            'fileref': "",
            'size': annotation.file_size,
            'filename': annotation.file_name,
            'base64_img_data': "",
            'file_attributes': '{}',
            "regions": regions
        }

    with open(annotations_file, 'w') as json_file:
        json.dump(via_annotations, json_file)

## Helper Methods

In [None]:
# export


def configure_logging(logging_level=logging.INFO):
    """
    Configures logging for the system.

    :param logging_level: The logging level to use.
    """
    logger.setLevel(logging_level)

    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging_level)

    logger.addHandler(handler)

## Run from command line

To run the data-set builder from command line, use the following command:
`python -m mlcore.annotation.via_adapter [parameters]`

The following parameters are supported:
- `[annotation]`: The path to the VIA annotation file (e.g.: *imagesets/segmentation/car_damage/via_region_data.json*)
- `--files_source`: The path to the folder containing the source files (e.g.: *imagesets/segmentation/car_damage/trainval*)
- `--category-label-key`: The key, the category name can be found in the annotation file, default to *category*.

In [None]:
# export


if __name__ == '__main__' and '__file__' in globals():
    # for direct shell execution
    configure_logging()

    parser = argparse.ArgumentParser()
    parser.add_argument("annotation",
                        help="The path to the VIA annotation file.")
    parser.add_argument("--files_source",
                        help="The path to the folder containing the source files.")
    parser.add_argument("--category-label-key",
                        help="The key of the category name.",
                        default=DEFAULT_CATEGORY_ID)
    args = parser.parse_args()

    read_annotations(args.annotation, args.files_source, args.category_label_key)
