From 0d9c4b7bb8d3128753cc51e1886ce360be6d0a47 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Thu, 13 Sep 2018 18:10:14 -0700 Subject: [PATCH 1/6] Handle bad VOC exports --- labelbox/__init__.py | 2 +- labelbox/exporters/voc_exporter.py | 45 ++++++++++++++++++++++-------- setup.cfg | 3 +- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/labelbox/__init__.py b/labelbox/__init__.py index 704a5efcc..6f28194a6 100644 --- a/labelbox/__init__.py +++ b/labelbox/__init__.py @@ -1,3 +1,3 @@ "The Labelbox python package." -__version__ = '0.0.3' +__version__ = '0.0.4' diff --git a/labelbox/exporters/voc_exporter.py b/labelbox/exporters/voc_exporter.py index 7807ece32..e5a5143c8 100644 --- a/labelbox/exporters/voc_exporter.py +++ b/labelbox/exporters/voc_exporter.py @@ -44,7 +44,13 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir, for data in label_data: try: - _write_label(data, label_format, images_output_dir, annotations_output_dir) + write_label( + data['ID'], + data['Labeled Data'], + data['Label'], + label_format, + images_output_dir, + annotations_output_dir) except requests.exceptions.MissingSchema as exc: logging.exception(exc) @@ -54,14 +60,23 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir, continue -def _write_label( - data, label_format, images_output_dir, annotations_output_dir): - "Writes a Pascal VOC formatted image and label pair to disk." +def write_label(label_id, image_url, labels, label_format, images_output_dir, annotations_output_dir): + """Writes a single Pascal VOC formatted image and label pair to disk. + + Args: + label_id (str): ID for the instance to write + image_url (str): URL to download image file from + labels (str): Labelbox formatted labels to use for generating annotation + label_format (str): Format of the labeled data. Valid options are: "WKT" and "XY", default is "WKT". + annotations_output_dir (str): File path of directory to write Pascal VOC + annotation files. + images_output_dir (str): File path of directory to write images. + """ # Download image and save it - response = requests.get(data['Labeled Data'], stream=True) + response = requests.get(image_url, stream=True) response.raw.decode_content = True image = Image.open(response.raw) - image_name = ('{img_id}.{ext}'.format(img_id=data['ID'], ext=image.format.lower())) + image_name = ('{img_id}.{ext}'.format(img_id=label_id, ext=image.format.lower())) image_fqn = os.path.join(images_output_dir, image_name) image.save(image_fqn, format=image.format) @@ -70,19 +85,19 @@ def _write_label( xml_writer = PascalWriter(image_fqn, width, height) # remove classification labels (Skip, etc...) - if not callable(getattr(data['Label'], 'keys', None)): + if not callable(getattr(labels, 'keys', None)): # skip if no categories (e.g. "Skip") return # convert label to Pascal VOC format - for category_name, wkt_data in data['Label'].items(): + for category_name, paths in labels.items(): if label_format == 'WKT': xml_writer = _add_pascal_object_from_wkt( - xml_writer, img_height=height, wkt_data=wkt_data, + xml_writer, img_height=height, wkt_data=paths, label=category_name) elif label_format == 'XY': xml_writer = _add_pascal_object_from_xy( - xml_writer, img_height=height, polygons=wkt_data, + xml_writer, img_height=height, polygons=paths, label=category_name) else: exc = UnknownFormatError(label_format=label_format) @@ -90,7 +105,7 @@ def _write_label( raise exc # write Pascal VOC xml annotation for image - xml_writer.save(os.path.join(annotations_output_dir, '{}.xml'.format(data['ID']))) + xml_writer.save(os.path.join(annotations_output_dir, '{}.xml'.format(label_id))) def _add_pascal_object_from_wkt(xml_writer, img_height, wkt_data, label): @@ -112,10 +127,16 @@ def _add_pascal_object_from_wkt(xml_writer, img_height, wkt_data, label): def _add_pascal_object_from_xy(xml_writer, img_height, polygons, label): + if not isinstance(polygons, list): + # polygons is not [{'geometry': [xy]}] nor [[xy]] + return xml_writer for polygon in polygons: if 'geometry' in polygon: # V3 polygon = polygon['geometry'] - assert isinstance(polygon, list) # V2 and V3 + if not isinstance(polygon, list) \ + or not all(map(lambda p: 'x' in p and 'y' in p, polygon)): + # couldn't make a list of points, give up + return xml_writer xy_coords = [] for point in polygon: diff --git a/setup.cfg b/setup.cfg index 3b4dfa5b8..7e5afeb6e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,7 +25,8 @@ classifiers = [options] zip_safe = False -packages = find_namespace: +packages = + labelbox include_package_data = True install_requires = jinja2 From 5067c605a0bb349ce0bb16690aceb62273a26683 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Thu, 13 Sep 2018 18:23:25 -0700 Subject: [PATCH 2/6] Make add_label public --- labelbox/exporters/coco_exporter.py | 56 ++++++++++++++++++++--------- labelbox/exporters/voc_exporter.py | 23 ++++++------ 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/labelbox/exporters/coco_exporter.py b/labelbox/exporters/coco_exporter.py index 79635b4a5..3f88a99de 100644 --- a/labelbox/exporters/coco_exporter.py +++ b/labelbox/exporters/coco_exporter.py @@ -2,13 +2,14 @@ Module for converting labelbox.com JSON exports to MS COCO format. """ -import json import datetime as dt +import json import logging +from PIL import Image +import requests from shapely import wkt from shapely.geometry import Polygon -import requests -from PIL import Image +from typing import Any, Dict, Sequence from labelbox.exceptions import UnknownFormatError @@ -25,15 +26,7 @@ def from_json(labeled_data, coco_output, label_format='WKT'): for data in label_data: # Download and get image name try: - image = { - "id": data['ID'], - "file_name": data['Labeled Data'], - "license": None, - "flickr_url": data['Labeled Data'], - "coco_url": data['Labeled Data'], - "date_captured": None, - } - _add_label(coco, image, data['Label'], label_format) + add_label(coco, data['ID'], data['Labeled Data'], data['Label'], label_format) except requests.exceptions.MissingSchema as exc: logging.exception(exc) continue @@ -45,8 +38,16 @@ def from_json(labeled_data, coco_output, label_format='WKT'): file_handle.write(json.dumps(coco)) -def make_coco_metadata(project_name, created_by): - "Initializes COCO export data structure." +def make_coco_metadata(project_name: str, created_by: str) -> Dict[str, Any]: + """Initializes COCO export data structure. + + Args: + project_name: name of the project + created_by: email of the project creator + + Returns: + The COCO export represented as a dictionary. + """ coco = { 'info': None, 'images': [], @@ -67,9 +68,30 @@ def make_coco_metadata(project_name, created_by): return coco -def _add_label(coco, image, labels, label_format): - "Incrementally updates COCO export data structure with a new label." - response = requests.get(image['coco_url'], stream=True) +def add_label( + coco: Dict[str, Any], label_id: str, image_url: str, + labels: Sequence[Any], label_format: str): + """Incrementally updates COCO export data structure with a new label. + + Args: + coco: The current COCO export, will be incrementally updated by this method. + label_id: ID for the instance to write + image_url: URL to download image file from + labels: Labelbox formatted labels to use for generating annotation + label_format: Format of the labeled data. Valid options are: "WKT" and "XY", default is "WKT". + + Returns: + The updated COCO export represented as a dictionary. + """ + image = { + "id": label_id, + "file_name": image_url, + "license": None, + "flickr_url": image_url, + "coco_url": image_url, + "date_captured": None, + } + response = requests.get(image_url, stream=True) response.raw.decode_content = True image['width'], image['height'] = Image.open(response.raw).size diff --git a/labelbox/exporters/voc_exporter.py b/labelbox/exporters/voc_exporter.py index e5a5143c8..9d6be5cf8 100644 --- a/labelbox/exporters/voc_exporter.py +++ b/labelbox/exporters/voc_exporter.py @@ -2,12 +2,13 @@ Module for converting labelbox.com JSON exports to Pascal VOC 2012 format. """ -import os import json import logging -from shapely import wkt -import requests +import os from PIL import Image +import requests +from shapely import wkt +from typing import Any, Sequence from labelbox.exceptions import UnknownFormatError from labelbox.exporters.pascal_voc_writer import Writer as PascalWriter @@ -60,17 +61,19 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir, continue -def write_label(label_id, image_url, labels, label_format, images_output_dir, annotations_output_dir): +def write_label( + label_id: str, image_url: str, labels: Sequence[Any], label_format: str, + images_output_dir: str, annotations_output_dir: str): """Writes a single Pascal VOC formatted image and label pair to disk. Args: - label_id (str): ID for the instance to write - image_url (str): URL to download image file from - labels (str): Labelbox formatted labels to use for generating annotation - label_format (str): Format of the labeled data. Valid options are: "WKT" and "XY", default is "WKT". - annotations_output_dir (str): File path of directory to write Pascal VOC + label_id: ID for the instance to write + image_url: URL to download image file from + labels: Labelbox formatted labels to use for generating annotation + label_format: Format of the labeled data. Valid options are: "WKT" and "XY", default is "WKT". + annotations_output_dir: File path of directory to write Pascal VOC annotation files. - images_output_dir (str): File path of directory to write images. + images_output_dir: File path of directory to write images. """ # Download image and save it response = requests.get(image_url, stream=True) From 807b757cdd227d44e25b65918f4a53057a4ead60 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Thu, 13 Sep 2018 18:34:15 -0700 Subject: [PATCH 3/6] Linter --- labelbox/exporters/coco_exporter.py | 47 ++++++++++++++++------------- labelbox/exporters/voc_exporter.py | 7 +++-- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/labelbox/exporters/coco_exporter.py b/labelbox/exporters/coco_exporter.py index 3f88a99de..b5ba88d05 100644 --- a/labelbox/exporters/coco_exporter.py +++ b/labelbox/exporters/coco_exporter.py @@ -5,13 +5,13 @@ import datetime as dt import json import logging -from PIL import Image import requests from shapely import wkt from shapely.geometry import Polygon from typing import Any, Dict, Sequence from labelbox.exceptions import UnknownFormatError +from PIL import Image def from_json(labeled_data, coco_output, label_format='WKT'): @@ -78,7 +78,8 @@ def add_label( label_id: ID for the instance to write image_url: URL to download image file from labels: Labelbox formatted labels to use for generating annotation - label_format: Format of the labeled data. Valid options are: "WKT" and "XY", default is "WKT". + label_format: Format of the labeled data. Valid options are: "WKT" and + "XY", default is "WKT". Returns: The updated COCO export represented as a dictionary. @@ -118,25 +119,29 @@ def add_label( coco['categories'].append(category) polygons = _get_polygons(label_format, label_data) - - for polygon in polygons: - segmentation = [] - for x_val, y_val in polygon.exterior.coords: - segmentation.extend([x_val, image['height'] - y_val]) - - annotation = { - "id": len(coco['annotations']) + 1, - "image_id": image['id'], - "category_id": category_id, - "segmentation": [segmentation], - "area": polygon.area, # float - "bbox": [polygon.bounds[0], polygon.bounds[1], - polygon.bounds[2] - polygon.bounds[0], - polygon.bounds[3] - polygon.bounds[1]], - "iscrowd": 0 - } - - coco['annotations'].append(annotation) + _append_polygons_as_annotations(coco, image, category_id, polygons) + + +def _append_polygons_as_annotations(coco, image, category_id, polygons): + "Adds `polygons` as annotations in the `coco` export" + for polygon in polygons: + segmentation = [] + for x_val, y_val in polygon.exterior.coords: + segmentation.extend([x_val, image['height'] - y_val]) + + annotation = { + "id": len(coco['annotations']) + 1, + "image_id": image['id'], + "category_id": category_id, + "segmentation": [segmentation], + "area": polygon.area, # float + "bbox": [polygon.bounds[0], polygon.bounds[1], + polygon.bounds[2] - polygon.bounds[0], + polygon.bounds[3] - polygon.bounds[1]], + "iscrowd": 0 + } + + coco['annotations'].append(annotation) def _get_polygons(label_format, label_data): diff --git a/labelbox/exporters/voc_exporter.py b/labelbox/exporters/voc_exporter.py index 9d6be5cf8..694360410 100644 --- a/labelbox/exporters/voc_exporter.py +++ b/labelbox/exporters/voc_exporter.py @@ -5,13 +5,13 @@ import json import logging import os -from PIL import Image import requests from shapely import wkt from typing import Any, Sequence from labelbox.exceptions import UnknownFormatError from labelbox.exporters.pascal_voc_writer import Writer as PascalWriter +from PIL import Image def from_json(labeled_data, annotations_output_dir, images_output_dir, @@ -70,9 +70,10 @@ def write_label( label_id: ID for the instance to write image_url: URL to download image file from labels: Labelbox formatted labels to use for generating annotation - label_format: Format of the labeled data. Valid options are: "WKT" and "XY", default is "WKT". + label_format: Format of the labeled data. Valid options are: "WKT" and + "XY", default is "WKT". annotations_output_dir: File path of directory to write Pascal VOC - annotation files. + annotation files. images_output_dir: File path of directory to write images. """ # Download image and save it From 9d5a748d66a722c73ac634415b677ac317defcf0 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Thu, 13 Sep 2018 18:39:01 -0700 Subject: [PATCH 4/6] Fix linter --- labelbox/exporters/voc_exporter.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/labelbox/exporters/voc_exporter.py b/labelbox/exporters/voc_exporter.py index 694360410..3989e24d1 100644 --- a/labelbox/exporters/voc_exporter.py +++ b/labelbox/exporters/voc_exporter.py @@ -5,13 +5,14 @@ import json import logging import os +from typing import Any, Sequence + +from PIL import Image import requests from shapely import wkt -from typing import Any, Sequence from labelbox.exceptions import UnknownFormatError from labelbox.exporters.pascal_voc_writer import Writer as PascalWriter -from PIL import Image def from_json(labeled_data, annotations_output_dir, images_output_dir, @@ -61,7 +62,7 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir, continue -def write_label( +def write_label( # pylint: disable-msg=too-many-arguments label_id: str, image_url: str, labels: Sequence[Any], label_format: str, images_output_dir: str, annotations_output_dir: str): """Writes a single Pascal VOC formatted image and label pair to disk. @@ -80,8 +81,9 @@ def write_label( response = requests.get(image_url, stream=True) response.raw.decode_content = True image = Image.open(response.raw) - image_name = ('{img_id}.{ext}'.format(img_id=label_id, ext=image.format.lower())) - image_fqn = os.path.join(images_output_dir, image_name) + image_fqn = os.path.join( + images_output_dir, + '{img_id}.{ext}'.format(img_id=label_id, ext=image.format.lower())) image.save(image_fqn, format=image.format) # generate image annotation in Pascal VOC From d8a608d110ed43ad7aceeb07a2ea4fc0154a41d0 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Thu, 13 Sep 2018 18:40:27 -0700 Subject: [PATCH 5/6] Import order --- labelbox/exporters/coco_exporter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/labelbox/exporters/coco_exporter.py b/labelbox/exporters/coco_exporter.py index b5ba88d05..6cd68e726 100644 --- a/labelbox/exporters/coco_exporter.py +++ b/labelbox/exporters/coco_exporter.py @@ -5,13 +5,13 @@ import datetime as dt import json import logging -import requests -from shapely import wkt -from shapely.geometry import Polygon from typing import Any, Dict, Sequence from labelbox.exceptions import UnknownFormatError from PIL import Image +import requests +from shapely import wkt +from shapely.geometry import Polygon def from_json(labeled_data, coco_output, label_format='WKT'): From 902d1a7b1565e972bff400af00507dd80d15772e Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Thu, 13 Sep 2018 18:43:43 -0700 Subject: [PATCH 6/6] Mypy fixes --- labelbox/exporters/coco_exporter.py | 29 +++++++++++++---------------- labelbox/exporters/voc_exporter.py | 4 ++-- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/labelbox/exporters/coco_exporter.py b/labelbox/exporters/coco_exporter.py index 6cd68e726..8abc9e6b5 100644 --- a/labelbox/exporters/coco_exporter.py +++ b/labelbox/exporters/coco_exporter.py @@ -5,14 +5,15 @@ import datetime as dt import json import logging -from typing import Any, Dict, Sequence +from typing import Any, Dict -from labelbox.exceptions import UnknownFormatError from PIL import Image import requests from shapely import wkt from shapely.geometry import Polygon +from labelbox.exceptions import UnknownFormatError + def from_json(labeled_data, coco_output, label_format='WKT'): "Writes labelbox JSON export into MS COCO format." @@ -48,29 +49,25 @@ def make_coco_metadata(project_name: str, created_by: str) -> Dict[str, Any]: Returns: The COCO export represented as a dictionary. """ - coco = { - 'info': None, + return { + 'info': { + 'year': dt.datetime.now(dt.timezone.utc).year, + 'version': None, + 'description': project_name, + 'contributor': created_by, + 'url': 'labelbox.com', + 'date_created': dt.datetime.now(dt.timezone.utc).isoformat() + }, 'images': [], 'annotations': [], 'licenses': [], 'categories': [] } - coco['info'] = { - 'year': dt.datetime.now(dt.timezone.utc).year, - 'version': None, - 'description': project_name, - 'contributor': created_by, - 'url': 'labelbox.com', - 'date_created': dt.datetime.now(dt.timezone.utc).isoformat() - } - - return coco - def add_label( coco: Dict[str, Any], label_id: str, image_url: str, - labels: Sequence[Any], label_format: str): + labels: Dict[str, Any], label_format: str): """Incrementally updates COCO export data structure with a new label. Args: diff --git a/labelbox/exporters/voc_exporter.py b/labelbox/exporters/voc_exporter.py index 3989e24d1..f1696ad1e 100644 --- a/labelbox/exporters/voc_exporter.py +++ b/labelbox/exporters/voc_exporter.py @@ -5,7 +5,7 @@ import json import logging import os -from typing import Any, Sequence +from typing import Any, Dict from PIL import Image import requests @@ -63,7 +63,7 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir, def write_label( # pylint: disable-msg=too-many-arguments - label_id: str, image_url: str, labels: Sequence[Any], label_format: str, + label_id: str, image_url: str, labels: Dict[str, Any], label_format: str, images_output_dir: str, annotations_output_dir: str): """Writes a single Pascal VOC formatted image and label pair to disk.