diff --git a/labelbox/data/annotation_types/data/raster.py b/labelbox/data/annotation_types/data/raster.py index 14497e33b..c500d1f3b 100644 --- a/labelbox/data/annotation_types/data/raster.py +++ b/labelbox/data/annotation_types/data/raster.py @@ -2,13 +2,16 @@ from io import BytesIO from typing import Callable, Optional, Union from typing_extensions import Literal -import numpy as np -import requests + from PIL import Image from google.api_core import retry from pydantic import BaseModel from pydantic import root_validator +from requests.exceptions import ConnectTimeout +import requests +import numpy as np +from labelbox.exceptions import InternalServerError from .base_data import BaseData from ..types import TypedArray @@ -113,7 +116,9 @@ def value(self) -> np.ndarray: def set_fetch_fn(self, fn): object.__setattr__(self, 'fetch_remote', lambda: fn(self)) - @retry.Retry(deadline=60.) + @retry.Retry(deadline=15., + predicate=retry.if_exception_type(ConnectTimeout, + InternalServerError)) def fetch_remote(self) -> bytes: """ Method for accessing url. @@ -122,6 +127,8 @@ def fetch_remote(self) -> bytes: simply override this function """ response = requests.get(self.url) + if response.status_code in [500, 502, 503, 504]: + raise InternalServerError(response.text) response.raise_for_status() return response.content diff --git a/labelbox/data/annotation_types/data/text.py b/labelbox/data/annotation_types/data/text.py index 096c7906b..816f39c2e 100644 --- a/labelbox/data/annotation_types/data/text.py +++ b/labelbox/data/annotation_types/data/text.py @@ -1,9 +1,11 @@ from typing import Callable, Optional import requests +from requests.exceptions import ConnectTimeout from google.api_core import retry from pydantic import root_validator +from labelbox.exceptions import InternalServerError from .base_data import BaseData @@ -47,7 +49,9 @@ def value(self) -> str: def set_fetch_fn(self, fn): object.__setattr__(self, 'fetch_remote', lambda: fn(self)) - @retry.Retry(deadline=15.) + @retry.Retry(deadline=15., + predicate=retry.if_exception_type(ConnectTimeout, + InternalServerError)) def fetch_remote(self) -> str: """ Method for accessing url. @@ -56,6 +60,8 @@ def fetch_remote(self) -> str: simply override this function """ response = requests.get(self.url) + if response.status_code in [500, 502, 503, 504]: + raise labelbox.exceptions.InternalServerError(response.text) response.raise_for_status() return response.text diff --git a/labelbox/data/serialization/coco/categories.py b/labelbox/data/serialization/coco/categories.py index 31f757361..07ecacb03 100644 --- a/labelbox/data/serialization/coco/categories.py +++ b/labelbox/data/serialization/coco/categories.py @@ -1,4 +1,5 @@ import sys +from hashlib import md5 from pydantic import BaseModel @@ -11,4 +12,5 @@ class Categories(BaseModel): def hash_category_name(name: str) -> int: - return hash(name) + sys.maxsize + return int.from_bytes( + md5(name.encode('utf-8')).hexdigest().encode('utf-8'), 'little') diff --git a/labelbox/data/serialization/coco/instance_dataset.py b/labelbox/data/serialization/coco/instance_dataset.py index fd1084ce3..24353dfb4 100644 --- a/labelbox/data/serialization/coco/instance_dataset.py +++ b/labelbox/data/serialization/coco/instance_dataset.py @@ -1,7 +1,7 @@ # https://cocodataset.org/#format-data from concurrent.futures import ProcessPoolExecutor, as_completed -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Tuple, Optional from pathlib import Path import numpy as np @@ -15,14 +15,16 @@ from .image import CocoImage, get_image, get_image_id -def mask_to_coco_object_annotation(annotation: ObjectAnnotation, annot_idx: int, - image_id: int, - category_id: int) -> COCOObjectAnnotation: +def mask_to_coco_object_annotation( + annotation: ObjectAnnotation, annot_idx: int, image_id: int, + category_id: int) -> Optional[COCOObjectAnnotation]: # This is going to fill any holes into the multipolygon # If you need to support holes use the panoptic data format shapely = annotation.value.shapely.simplify(1).buffer(0) + if shapely.is_empty: - shapely = annotation.value.shapely.simplify(1).buffer(0.01) + return + xmin, ymin, xmax, ymax = shapely.bounds # Iterate over polygon once or multiple polygon for each item area = shapely.area @@ -89,6 +91,19 @@ def segmentations_to_common(class_annotations: COCOObjectAnnotation, return annotations +def object_annotation_to_coco( + annotation: ObjectAnnotation, annot_idx: int, image_id: int, + category_id: int) -> Optional[COCOObjectAnnotation]: + if isinstance(annotation.value, Mask): + return mask_to_coco_object_annotation(annotation, annot_idx, image_id, + category_id) + elif isinstance(annotation.value, (Polygon, Rectangle)): + return vector_to_coco_object_annotation(annotation, annot_idx, image_id, + category_id) + else: + return None + + def process_label( label: Label, idx: int, @@ -103,20 +118,16 @@ def process_label( categories = {} for class_name in annotation_lookup: for annotation in annotation_lookup[class_name]: - if annotation.name not in categories: - categories[annotation.name] = hash_category_name( - annotation.name) - if isinstance(annotation.value, Mask): - coco_annotations.append( - mask_to_coco_object_annotation(annotation, annot_idx, - image_id, - categories[annotation.name])) - elif isinstance(annotation.value, (Polygon, Rectangle)): - coco_annotations.append( - vector_to_coco_object_annotation( - annotation, annot_idx, image_id, - categories[annotation.name])) - annot_idx += 1 + category_id = categories.get(annotation.name) or hash_category_name( + annotation.name) + coco_annotation = object_annotation_to_coco(annotation, annot_idx, + image_id, category_id) + if coco_annotation is not None: + coco_annotations.append(coco_annotation) + if annotation.name not in categories: + categories[annotation.name] = category_id + annot_idx += 1 + return image, coco_annotations, categories @@ -147,6 +158,7 @@ def from_common(cls, future.result() for future in tqdm(as_completed(futures)) ] else: + results = [ process_label(label, idx, image_root) for idx, label in enumerate(labels)