Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion labelbox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"The Labelbox python package."

__version__ = '0.0.3'
__version__ = '0.0.4'
122 changes: 73 additions & 49 deletions labelbox/exporters/coco_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
Module for converting labelbox.com JSON exports to MS COCO format.
"""

import json
import datetime as dt
import json
import logging
from typing import Any, Dict

from PIL import Image
import requests
from shapely import wkt
from shapely.geometry import Polygon
import requests
from PIL import Image

from labelbox.exceptions import UnknownFormatError

Expand All @@ -25,15 +27,7 @@ def from_json(labeled_data, coco_output, label_format='WKT'):
for data in label_data:
# Download and get image name
try:
image = {
"id": data['ID'],
"file_name": data['Labeled Data'],
"license": None,
"flickr_url": data['Labeled Data'],
"coco_url": data['Labeled Data'],
"date_captured": None,
}
_add_label(coco, image, data['Label'], label_format)
add_label(coco, data['ID'], data['Labeled Data'], data['Label'], label_format)
except requests.exceptions.MissingSchema as exc:
logging.exception(exc)
continue
Expand All @@ -45,31 +39,57 @@ def from_json(labeled_data, coco_output, label_format='WKT'):
file_handle.write(json.dumps(coco))


def make_coco_metadata(project_name, created_by):
"Initializes COCO export data structure."
coco = {
'info': None,
def make_coco_metadata(project_name: str, created_by: str) -> Dict[str, Any]:
"""Initializes COCO export data structure.

Args:
project_name: name of the project
created_by: email of the project creator

Returns:
The COCO export represented as a dictionary.
"""
return {
'info': {
'year': dt.datetime.now(dt.timezone.utc).year,
'version': None,
'description': project_name,
'contributor': created_by,
'url': 'labelbox.com',
'date_created': dt.datetime.now(dt.timezone.utc).isoformat()
},
'images': [],
'annotations': [],
'licenses': [],
'categories': []
}

coco['info'] = {
'year': dt.datetime.now(dt.timezone.utc).year,
'version': None,
'description': project_name,
'contributor': created_by,
'url': 'labelbox.com',
'date_created': dt.datetime.now(dt.timezone.utc).isoformat()
}

return coco


def _add_label(coco, image, labels, label_format):
"Incrementally updates COCO export data structure with a new label."
response = requests.get(image['coco_url'], stream=True)
def add_label(
coco: Dict[str, Any], label_id: str, image_url: str,
labels: Dict[str, Any], label_format: str):
"""Incrementally updates COCO export data structure with a new label.

Args:
coco: The current COCO export, will be incrementally updated by this method.
label_id: ID for the instance to write
image_url: URL to download image file from
labels: Labelbox formatted labels to use for generating annotation
label_format: Format of the labeled data. Valid options are: "WKT" and
"XY", default is "WKT".

Returns:
The updated COCO export represented as a dictionary.
"""
image = {
"id": label_id,
"file_name": image_url,
"license": None,
"flickr_url": image_url,
"coco_url": image_url,
"date_captured": None,
}
response = requests.get(image_url, stream=True)
response.raw.decode_content = True
image['width'], image['height'] = Image.open(response.raw).size

Expand All @@ -96,25 +116,29 @@ def _add_label(coco, image, labels, label_format):
coco['categories'].append(category)

polygons = _get_polygons(label_format, label_data)

for polygon in polygons:
segmentation = []
for x_val, y_val in polygon.exterior.coords:
segmentation.extend([x_val, image['height'] - y_val])

annotation = {
"id": len(coco['annotations']) + 1,
"image_id": image['id'],
"category_id": category_id,
"segmentation": [segmentation],
"area": polygon.area, # float
"bbox": [polygon.bounds[0], polygon.bounds[1],
polygon.bounds[2] - polygon.bounds[0],
polygon.bounds[3] - polygon.bounds[1]],
"iscrowd": 0
}

coco['annotations'].append(annotation)
_append_polygons_as_annotations(coco, image, category_id, polygons)


def _append_polygons_as_annotations(coco, image, category_id, polygons):
"Adds `polygons` as annotations in the `coco` export"
for polygon in polygons:
segmentation = []
for x_val, y_val in polygon.exterior.coords:
segmentation.extend([x_val, image['height'] - y_val])

annotation = {
"id": len(coco['annotations']) + 1,
"image_id": image['id'],
"category_id": category_id,
"segmentation": [segmentation],
"area": polygon.area, # float
"bbox": [polygon.bounds[0], polygon.bounds[1],
polygon.bounds[2] - polygon.bounds[0],
polygon.bounds[3] - polygon.bounds[1]],
"iscrowd": 0
}

coco['annotations'].append(annotation)


def _get_polygons(label_format, label_data):
Expand Down
59 changes: 43 additions & 16 deletions labelbox/exporters/voc_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
Module for converting labelbox.com JSON exports to Pascal VOC 2012 format.
"""

import os
import json
import logging
from shapely import wkt
import requests
import os
from typing import Any, Dict

from PIL import Image
import requests
from shapely import wkt

from labelbox.exceptions import UnknownFormatError
from labelbox.exporters.pascal_voc_writer import Writer as PascalWriter
Expand Down Expand Up @@ -44,7 +46,13 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir,

for data in label_data:
try:
_write_label(data, label_format, images_output_dir, annotations_output_dir)
write_label(
data['ID'],
data['Labeled Data'],
data['Label'],
label_format,
images_output_dir,
annotations_output_dir)

except requests.exceptions.MissingSchema as exc:
logging.exception(exc)
Expand All @@ -54,43 +62,56 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir,
continue


def _write_label(
data, label_format, images_output_dir, annotations_output_dir):
"Writes a Pascal VOC formatted image and label pair to disk."
def write_label( # pylint: disable-msg=too-many-arguments
label_id: str, image_url: str, labels: Dict[str, Any], label_format: str,
images_output_dir: str, annotations_output_dir: str):
"""Writes a single Pascal VOC formatted image and label pair to disk.

Args:
label_id: ID for the instance to write
image_url: URL to download image file from
labels: Labelbox formatted labels to use for generating annotation
label_format: Format of the labeled data. Valid options are: "WKT" and
"XY", default is "WKT".
annotations_output_dir: File path of directory to write Pascal VOC
annotation files.
images_output_dir: File path of directory to write images.
"""
# Download image and save it
response = requests.get(data['Labeled Data'], stream=True)
response = requests.get(image_url, stream=True)
response.raw.decode_content = True
image = Image.open(response.raw)
image_name = ('{img_id}.{ext}'.format(img_id=data['ID'], ext=image.format.lower()))
image_fqn = os.path.join(images_output_dir, image_name)
image_fqn = os.path.join(
images_output_dir,
'{img_id}.{ext}'.format(img_id=label_id, ext=image.format.lower()))
image.save(image_fqn, format=image.format)

# generate image annotation in Pascal VOC
width, height = image.size
xml_writer = PascalWriter(image_fqn, width, height)

# remove classification labels (Skip, etc...)
if not callable(getattr(data['Label'], 'keys', None)):
if not callable(getattr(labels, 'keys', None)):
# skip if no categories (e.g. "Skip")
return

# convert label to Pascal VOC format
for category_name, wkt_data in data['Label'].items():
for category_name, paths in labels.items():
if label_format == 'WKT':
xml_writer = _add_pascal_object_from_wkt(
xml_writer, img_height=height, wkt_data=wkt_data,
xml_writer, img_height=height, wkt_data=paths,
label=category_name)
elif label_format == 'XY':
xml_writer = _add_pascal_object_from_xy(
xml_writer, img_height=height, polygons=wkt_data,
xml_writer, img_height=height, polygons=paths,
label=category_name)
else:
exc = UnknownFormatError(label_format=label_format)
logging.exception(exc.message)
raise exc

# write Pascal VOC xml annotation for image
xml_writer.save(os.path.join(annotations_output_dir, '{}.xml'.format(data['ID'])))
xml_writer.save(os.path.join(annotations_output_dir, '{}.xml'.format(label_id)))


def _add_pascal_object_from_wkt(xml_writer, img_height, wkt_data, label):
Expand All @@ -112,10 +133,16 @@ def _add_pascal_object_from_wkt(xml_writer, img_height, wkt_data, label):


def _add_pascal_object_from_xy(xml_writer, img_height, polygons, label):
if not isinstance(polygons, list):
# polygons is not [{'geometry': [xy]}] nor [[xy]]
return xml_writer
for polygon in polygons:
if 'geometry' in polygon: # V3
polygon = polygon['geometry']
assert isinstance(polygon, list) # V2 and V3
if not isinstance(polygon, list) \
or not all(map(lambda p: 'x' in p and 'y' in p, polygon)):
# couldn't make a list of points, give up
return xml_writer

xy_coords = []
for point in polygon:
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ classifiers =

[options]
zip_safe = False
packages = find_namespace:
packages =
labelbox
include_package_data = True
install_requires =
jinja2
Expand Down