diff --git a/labelbox/__init__.py b/labelbox/__init__.py index 704a5efcc..6f28194a6 100644 --- a/labelbox/__init__.py +++ b/labelbox/__init__.py @@ -1,3 +1,3 @@ "The Labelbox python package." -__version__ = '0.0.3' +__version__ = '0.0.4' diff --git a/labelbox/exporters/coco_exporter.py b/labelbox/exporters/coco_exporter.py index 79635b4a5..8abc9e6b5 100644 --- a/labelbox/exporters/coco_exporter.py +++ b/labelbox/exporters/coco_exporter.py @@ -2,13 +2,15 @@ Module for converting labelbox.com JSON exports to MS COCO format. """ -import json import datetime as dt +import json import logging +from typing import Any, Dict + +from PIL import Image +import requests from shapely import wkt from shapely.geometry import Polygon -import requests -from PIL import Image from labelbox.exceptions import UnknownFormatError @@ -25,15 +27,7 @@ def from_json(labeled_data, coco_output, label_format='WKT'): for data in label_data: # Download and get image name try: - image = { - "id": data['ID'], - "file_name": data['Labeled Data'], - "license": None, - "flickr_url": data['Labeled Data'], - "coco_url": data['Labeled Data'], - "date_captured": None, - } - _add_label(coco, image, data['Label'], label_format) + add_label(coco, data['ID'], data['Labeled Data'], data['Label'], label_format) except requests.exceptions.MissingSchema as exc: logging.exception(exc) continue @@ -45,31 +39,57 @@ def from_json(labeled_data, coco_output, label_format='WKT'): file_handle.write(json.dumps(coco)) -def make_coco_metadata(project_name, created_by): - "Initializes COCO export data structure." - coco = { - 'info': None, +def make_coco_metadata(project_name: str, created_by: str) -> Dict[str, Any]: + """Initializes COCO export data structure. + + Args: + project_name: name of the project + created_by: email of the project creator + + Returns: + The COCO export represented as a dictionary. + """ + return { + 'info': { + 'year': dt.datetime.now(dt.timezone.utc).year, + 'version': None, + 'description': project_name, + 'contributor': created_by, + 'url': 'labelbox.com', + 'date_created': dt.datetime.now(dt.timezone.utc).isoformat() + }, 'images': [], 'annotations': [], 'licenses': [], 'categories': [] } - coco['info'] = { - 'year': dt.datetime.now(dt.timezone.utc).year, - 'version': None, - 'description': project_name, - 'contributor': created_by, - 'url': 'labelbox.com', - 'date_created': dt.datetime.now(dt.timezone.utc).isoformat() - } - - return coco - -def _add_label(coco, image, labels, label_format): - "Incrementally updates COCO export data structure with a new label." - response = requests.get(image['coco_url'], stream=True) +def add_label( + coco: Dict[str, Any], label_id: str, image_url: str, + labels: Dict[str, Any], label_format: str): + """Incrementally updates COCO export data structure with a new label. + + Args: + coco: The current COCO export, will be incrementally updated by this method. + label_id: ID for the instance to write + image_url: URL to download image file from + labels: Labelbox formatted labels to use for generating annotation + label_format: Format of the labeled data. Valid options are: "WKT" and + "XY", default is "WKT". + + Returns: + The updated COCO export represented as a dictionary. + """ + image = { + "id": label_id, + "file_name": image_url, + "license": None, + "flickr_url": image_url, + "coco_url": image_url, + "date_captured": None, + } + response = requests.get(image_url, stream=True) response.raw.decode_content = True image['width'], image['height'] = Image.open(response.raw).size @@ -96,25 +116,29 @@ def _add_label(coco, image, labels, label_format): coco['categories'].append(category) polygons = _get_polygons(label_format, label_data) - - for polygon in polygons: - segmentation = [] - for x_val, y_val in polygon.exterior.coords: - segmentation.extend([x_val, image['height'] - y_val]) - - annotation = { - "id": len(coco['annotations']) + 1, - "image_id": image['id'], - "category_id": category_id, - "segmentation": [segmentation], - "area": polygon.area, # float - "bbox": [polygon.bounds[0], polygon.bounds[1], - polygon.bounds[2] - polygon.bounds[0], - polygon.bounds[3] - polygon.bounds[1]], - "iscrowd": 0 - } - - coco['annotations'].append(annotation) + _append_polygons_as_annotations(coco, image, category_id, polygons) + + +def _append_polygons_as_annotations(coco, image, category_id, polygons): + "Adds `polygons` as annotations in the `coco` export" + for polygon in polygons: + segmentation = [] + for x_val, y_val in polygon.exterior.coords: + segmentation.extend([x_val, image['height'] - y_val]) + + annotation = { + "id": len(coco['annotations']) + 1, + "image_id": image['id'], + "category_id": category_id, + "segmentation": [segmentation], + "area": polygon.area, # float + "bbox": [polygon.bounds[0], polygon.bounds[1], + polygon.bounds[2] - polygon.bounds[0], + polygon.bounds[3] - polygon.bounds[1]], + "iscrowd": 0 + } + + coco['annotations'].append(annotation) def _get_polygons(label_format, label_data): diff --git a/labelbox/exporters/voc_exporter.py b/labelbox/exporters/voc_exporter.py index 7807ece32..f1696ad1e 100644 --- a/labelbox/exporters/voc_exporter.py +++ b/labelbox/exporters/voc_exporter.py @@ -2,12 +2,14 @@ Module for converting labelbox.com JSON exports to Pascal VOC 2012 format. """ -import os import json import logging -from shapely import wkt -import requests +import os +from typing import Any, Dict + from PIL import Image +import requests +from shapely import wkt from labelbox.exceptions import UnknownFormatError from labelbox.exporters.pascal_voc_writer import Writer as PascalWriter @@ -44,7 +46,13 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir, for data in label_data: try: - _write_label(data, label_format, images_output_dir, annotations_output_dir) + write_label( + data['ID'], + data['Labeled Data'], + data['Label'], + label_format, + images_output_dir, + annotations_output_dir) except requests.exceptions.MissingSchema as exc: logging.exception(exc) @@ -54,15 +62,28 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir, continue -def _write_label( - data, label_format, images_output_dir, annotations_output_dir): - "Writes a Pascal VOC formatted image and label pair to disk." +def write_label( # pylint: disable-msg=too-many-arguments + label_id: str, image_url: str, labels: Dict[str, Any], label_format: str, + images_output_dir: str, annotations_output_dir: str): + """Writes a single Pascal VOC formatted image and label pair to disk. + + Args: + label_id: ID for the instance to write + image_url: URL to download image file from + labels: Labelbox formatted labels to use for generating annotation + label_format: Format of the labeled data. Valid options are: "WKT" and + "XY", default is "WKT". + annotations_output_dir: File path of directory to write Pascal VOC + annotation files. + images_output_dir: File path of directory to write images. + """ # Download image and save it - response = requests.get(data['Labeled Data'], stream=True) + response = requests.get(image_url, stream=True) response.raw.decode_content = True image = Image.open(response.raw) - image_name = ('{img_id}.{ext}'.format(img_id=data['ID'], ext=image.format.lower())) - image_fqn = os.path.join(images_output_dir, image_name) + image_fqn = os.path.join( + images_output_dir, + '{img_id}.{ext}'.format(img_id=label_id, ext=image.format.lower())) image.save(image_fqn, format=image.format) # generate image annotation in Pascal VOC @@ -70,19 +91,19 @@ def _write_label( xml_writer = PascalWriter(image_fqn, width, height) # remove classification labels (Skip, etc...) - if not callable(getattr(data['Label'], 'keys', None)): + if not callable(getattr(labels, 'keys', None)): # skip if no categories (e.g. "Skip") return # convert label to Pascal VOC format - for category_name, wkt_data in data['Label'].items(): + for category_name, paths in labels.items(): if label_format == 'WKT': xml_writer = _add_pascal_object_from_wkt( - xml_writer, img_height=height, wkt_data=wkt_data, + xml_writer, img_height=height, wkt_data=paths, label=category_name) elif label_format == 'XY': xml_writer = _add_pascal_object_from_xy( - xml_writer, img_height=height, polygons=wkt_data, + xml_writer, img_height=height, polygons=paths, label=category_name) else: exc = UnknownFormatError(label_format=label_format) @@ -90,7 +111,7 @@ def _write_label( raise exc # write Pascal VOC xml annotation for image - xml_writer.save(os.path.join(annotations_output_dir, '{}.xml'.format(data['ID']))) + xml_writer.save(os.path.join(annotations_output_dir, '{}.xml'.format(label_id))) def _add_pascal_object_from_wkt(xml_writer, img_height, wkt_data, label): @@ -112,10 +133,16 @@ def _add_pascal_object_from_wkt(xml_writer, img_height, wkt_data, label): def _add_pascal_object_from_xy(xml_writer, img_height, polygons, label): + if not isinstance(polygons, list): + # polygons is not [{'geometry': [xy]}] nor [[xy]] + return xml_writer for polygon in polygons: if 'geometry' in polygon: # V3 polygon = polygon['geometry'] - assert isinstance(polygon, list) # V2 and V3 + if not isinstance(polygon, list) \ + or not all(map(lambda p: 'x' in p and 'y' in p, polygon)): + # couldn't make a list of points, give up + return xml_writer xy_coords = [] for point in polygon: diff --git a/setup.cfg b/setup.cfg index 3b4dfa5b8..7e5afeb6e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,7 +25,8 @@ classifiers = [options] zip_safe = False -packages = find_namespace: +packages = + labelbox include_package_data = True install_requires = jinja2