Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions labelbox/data/annotation_types/data/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
from io import BytesIO
from typing import Callable, Optional, Union
from typing_extensions import Literal
import numpy as np
import requests

from PIL import Image
from google.api_core import retry
from pydantic import BaseModel
from pydantic import root_validator
from requests.exceptions import ConnectTimeout
import requests
import numpy as np

from labelbox.exceptions import InternalServerError
from .base_data import BaseData
from ..types import TypedArray

Expand Down Expand Up @@ -113,7 +116,9 @@ def value(self) -> np.ndarray:
def set_fetch_fn(self, fn):
object.__setattr__(self, 'fetch_remote', lambda: fn(self))

@retry.Retry(deadline=60.)
@retry.Retry(deadline=15.,
predicate=retry.if_exception_type(ConnectTimeout,
InternalServerError))
def fetch_remote(self) -> bytes:
"""
Method for accessing url.
Expand All @@ -122,6 +127,8 @@ def fetch_remote(self) -> bytes:
simply override this function
"""
response = requests.get(self.url)
if response.status_code in [500, 502, 503, 504]:
raise InternalServerError(response.text)
response.raise_for_status()
return response.content

Expand Down
8 changes: 7 additions & 1 deletion labelbox/data/annotation_types/data/text.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from typing import Callable, Optional

import requests
from requests.exceptions import ConnectTimeout
from google.api_core import retry
from pydantic import root_validator

from labelbox.exceptions import InternalServerError
from .base_data import BaseData


Expand Down Expand Up @@ -47,7 +49,9 @@ def value(self) -> str:
def set_fetch_fn(self, fn):
object.__setattr__(self, 'fetch_remote', lambda: fn(self))

@retry.Retry(deadline=15.)
@retry.Retry(deadline=15.,
predicate=retry.if_exception_type(ConnectTimeout,
InternalServerError))
def fetch_remote(self) -> str:
"""
Method for accessing url.
Expand All @@ -56,6 +60,8 @@ def fetch_remote(self) -> str:
simply override this function
"""
response = requests.get(self.url)
if response.status_code in [500, 502, 503, 504]:
raise labelbox.exceptions.InternalServerError(response.text)
response.raise_for_status()
return response.text

Expand Down
4 changes: 3 additions & 1 deletion labelbox/data/serialization/coco/categories.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
from hashlib import md5

from pydantic import BaseModel

Expand All @@ -11,4 +12,5 @@ class Categories(BaseModel):


def hash_category_name(name: str) -> int:
return hash(name) + sys.maxsize
return int.from_bytes(
md5(name.encode('utf-8')).hexdigest().encode('utf-8'), 'little')
50 changes: 31 additions & 19 deletions labelbox/data/serialization/coco/instance_dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# https://cocodataset.org/#format-data

from concurrent.futures import ProcessPoolExecutor, as_completed
from typing import Any, Dict, List, Tuple
from typing import Any, Dict, List, Tuple, Optional
from pathlib import Path

import numpy as np
Expand All @@ -15,14 +15,16 @@
from .image import CocoImage, get_image, get_image_id


def mask_to_coco_object_annotation(annotation: ObjectAnnotation, annot_idx: int,
image_id: int,
category_id: int) -> COCOObjectAnnotation:
def mask_to_coco_object_annotation(
annotation: ObjectAnnotation, annot_idx: int, image_id: int,
category_id: int) -> Optional[COCOObjectAnnotation]:
# This is going to fill any holes into the multipolygon
# If you need to support holes use the panoptic data format
shapely = annotation.value.shapely.simplify(1).buffer(0)

if shapely.is_empty:
shapely = annotation.value.shapely.simplify(1).buffer(0.01)
return

xmin, ymin, xmax, ymax = shapely.bounds
# Iterate over polygon once or multiple polygon for each item
area = shapely.area
Expand Down Expand Up @@ -89,6 +91,19 @@ def segmentations_to_common(class_annotations: COCOObjectAnnotation,
return annotations


def object_annotation_to_coco(
annotation: ObjectAnnotation, annot_idx: int, image_id: int,
category_id: int) -> Optional[COCOObjectAnnotation]:
if isinstance(annotation.value, Mask):
return mask_to_coco_object_annotation(annotation, annot_idx, image_id,
category_id)
elif isinstance(annotation.value, (Polygon, Rectangle)):
return vector_to_coco_object_annotation(annotation, annot_idx, image_id,
category_id)
else:
return None


def process_label(
label: Label,
idx: int,
Expand All @@ -103,20 +118,16 @@ def process_label(
categories = {}
for class_name in annotation_lookup:
for annotation in annotation_lookup[class_name]:
if annotation.name not in categories:
categories[annotation.name] = hash_category_name(
annotation.name)
if isinstance(annotation.value, Mask):
coco_annotations.append(
mask_to_coco_object_annotation(annotation, annot_idx,
image_id,
categories[annotation.name]))
elif isinstance(annotation.value, (Polygon, Rectangle)):
coco_annotations.append(
vector_to_coco_object_annotation(
annotation, annot_idx, image_id,
categories[annotation.name]))
annot_idx += 1
category_id = categories.get(annotation.name) or hash_category_name(
annotation.name)
coco_annotation = object_annotation_to_coco(annotation, annot_idx,
image_id, category_id)
if coco_annotation is not None:
coco_annotations.append(coco_annotation)
if annotation.name not in categories:
categories[annotation.name] = category_id
annot_idx += 1

return image, coco_annotations, categories


Expand Down Expand Up @@ -147,6 +158,7 @@ def from_common(cls,
future.result() for future in tqdm(as_completed(futures))
]
else:

results = [
process_label(label, idx, image_root)
for idx, label in enumerate(labels)
Expand Down