Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ The package `rasterio` installed by `labelbox[data]` relies on GDAL which could
You may see the following error message:

```
INFO:root:Building on Windows requires extra options to setup.py to locate needed GDAL files. More information is available in the README.
INFO:root:Building on Windows requires extra options to setup.py to locate needed GDAL files. More information is available in the README.

ERROR: A GDAL API version must be specified. Provide a path to gdal-config using a GDAL_CONFIG environment variable or use a GDAL_VERSION environment variable.
ERROR: A GDAL API version must be specified. Provide a path to gdal-config using a GDAL_CONFIG environment variable or use a GDAL_VERSION environment variable.
```

As a workaround:
Expand All @@ -72,7 +72,7 @@ As a workaround:

Note: You need to download the right files for your Python version. In the files above `cp38` means CPython 3.8.

2. After downloading the files, please run the following commands, in this particular order.
2. After downloading the files, please run the following commands, in this particular order.

```
pip install GDAL‑3.3.2‑cp38‑cp38‑win_amd64.wh
Expand Down
18 changes: 15 additions & 3 deletions labelbox/data/annotation_types/classification/classification.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List
from typing import Any, Dict, List, Union, Optional

try:
from typing import Literal
Expand All @@ -24,13 +24,25 @@ class ClassificationAnswer(FeatureSchema):
- Represents a classification option.
- Because it inherits from FeatureSchema
the option can be represented with either the name or feature_schema_id

- The key frame arg only applies to video classifications.
Each answer can have a key frame indepdent of the others.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indepdent -> independent

So unlike object annotations, classification annotations
track key frames at a classification answer level.
"""
extra: Dict[str, Any] = {}
keyframe: Optional[bool] = None

def dict(self, *args, **kwargs):
res = super().dict(*args, **kwargs)
if res['keyframe'] is None:
res.pop('keyframe')
return res


class Radio(BaseModel):
""" A classification with only one selected option allowed

>>> Radio(answer = ClassificationAnswer(name = "dog"))

"""
Expand All @@ -50,7 +62,7 @@ class Checklist(_TempName):
class Text(BaseModel):
""" Free form text

>>> Text(answer = "some text answer")
>>> Text(answer = "some text answer")

"""
answer: str
Expand Down
64 changes: 25 additions & 39 deletions labelbox/data/serialization/labelbox_v1/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,56 +9,53 @@


class LBV1ClassificationAnswer(LBV1Feature):
...

def to_common(self) -> ClassificationAnswer:
return ClassificationAnswer(feature_schema_id=self.schema_id,
name=self.title,
keyframe=self.keyframe,
extra={
'feature_id': self.feature_id,
'value': self.value
})

@classmethod
def from_common(
cls,
answer: ClassificationAnnotation) -> "LBV1ClassificationAnswer":
return cls(schema_id=answer.feature_schema_id,
title=answer.name,
value=answer.extra.get('value'),
feature_id=answer.extra.get('feature_id'),
keyframe=answer.keyframe)


class LBV1Radio(LBV1Feature):
answer: LBV1ClassificationAnswer

def to_common(self) -> Radio:
return Radio(answer=ClassificationAnswer(
feature_schema_id=self.answer.schema_id,
name=self.answer.title,
extra={
'feature_id': self.answer.feature_id,
'value': self.answer.value
}))
return Radio(answer=self.answer.to_common())

@classmethod
def from_common(cls, radio: Radio, feature_schema_id: Cuid,
**extra) -> "LBV1Radio":
return cls(schema_id=feature_schema_id,
answer=LBV1ClassificationAnswer(
schema_id=radio.answer.feature_schema_id,
title=radio.answer.name,
value=radio.answer.extra.get('value'),
feature_id=radio.answer.extra.get('feature_id')),
answer=LBV1ClassificationAnswer.from_common(radio.answer),
**extra)


class LBV1Checklist(LBV1Feature):
answers: List[LBV1ClassificationAnswer]

def to_common(self) -> Checklist:
return Checklist(answer=[
ClassificationAnswer(feature_schema_id=answer.schema_id,
name=answer.title,
extra={
'feature_id': answer.feature_id,
'value': answer.value
}) for answer in self.answers
])
return Checklist(answer=[answer.to_common() for answer in self.answers])

@classmethod
def from_common(cls, checklist: Checklist, feature_schema_id: Cuid,
**extra) -> "LBV1Checklist":
return cls(schema_id=feature_schema_id,
answers=[
LBV1ClassificationAnswer(
schema_id=answer.feature_schema_id,
title=answer.name,
value=answer.extra.get('value'),
feature_id=answer.extra.get('feature_id'))
LBV1ClassificationAnswer.from_common(answer)
for answer in checklist.answer
],
**extra)
Expand All @@ -68,25 +65,14 @@ class LBV1Dropdown(LBV1Feature):
answer: List[LBV1ClassificationAnswer]

def to_common(self) -> Dropdown:
return Dropdown(answer=[
ClassificationAnswer(feature_schema_id=answer.schema_id,
name=answer.title,
extra={
'feature_id': answer.feature_id,
'value': answer.value
}) for answer in self.answer
])
return Dropdown(answer=[answer.to_common() for answer in self.answer])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably not related to this pr... but Dropdown only support 1 answer similar to Radio answers rather than Checklist is there a reason why we have a list of answers for Dropdown?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is a list because that is how we export it


@classmethod
def from_common(cls, dropdown: Dropdown, feature_schema_id: Cuid,
**extra) -> "LBV1Dropdown":
return cls(schema_id=feature_schema_id,
answer=[
LBV1ClassificationAnswer(
schema_id=answer.feature_schema_id,
title=answer.name,
value=answer.extra.get('value'),
feature_id=answer.extra.get('feature_id'))
LBV1ClassificationAnswer.from_common(answer)
for answer in dropdown.answer
],
**extra)
Expand Down
66 changes: 34 additions & 32 deletions labelbox/data/serialization/labelbox_v1/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,20 +137,17 @@ class LBV1Label(BaseModel):
label_url: Optional[str] = Extra('View Label')
has_open_issues: Optional[float] = Extra('Has Open Issues')
skipped: Optional[bool] = Extra('Skipped')
media_type: Optional[str] = Extra('media_type')

def to_common(self) -> Label:
if isinstance(self.label, list):
annotations = []
for lbl in self.label:
annotations.extend(lbl.to_common())
data = VideoData(url=self.row_data,
external_id=self.external_id,
uid=self.data_row_id)
else:
annotations = self.label.to_common()
data = self._infer_media_type()

return Label(data=data,
return Label(data=self._data_row_to_common(),
uid=self.id,
annotations=annotations,
extra={
Expand All @@ -174,44 +171,49 @@ def from_common(cls, label: Label):
external_id=label.data.external_id,
**label.extra)

def _infer_media_type(self):
# Video annotations are formatted differently from text and images
# So we only need to differentiate those two
def _data_row_to_common(self) -> Union[ImageData, TextData, VideoData]:
# Use data row information to construct the appropriate annotatin type
data_row_info = {
'url' if self._is_url() else 'text': self.row_data,
'external_id': self.external_id,
'uid': self.data_row_id
}

self.media_type = self.media_type or self._infer_media_type()
media_mapping = {
'text': TextData,
'image': ImageData,
'video': VideoData
}
if self.media_type not in media_mapping:
raise ValueError(
f"Annotation types are only supported for {list(media_mapping)} media types."
f" Found {self.media_type}.")
return media_mapping[self.media_type](**data_row_info)

def _infer_media_type(self) -> str:
# Determines the data row type based on the label content
if isinstance(self.label, list):
return 'video'
if self._has_text_annotations():
# If it has text annotations then it must be text
if self._is_url():
return TextData(url=self.row_data, **data_row_info)
else:
return TextData(text=self.row_data, **data_row_info)
return 'text'
elif self._has_object_annotations():
# If it has object annotations and none are text annotations then it must be an image
if self._is_url():
return ImageData(url=self.row_data, **data_row_info)
else:
return ImageData(text=self.row_data, **data_row_info)
return 'image'
else:
# no annotations to infer data type from.
# Use information from the row_data format if possible.
if self._row_contains((".jpg", ".png", ".jpeg")) and self._is_url():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this be an or statement with line 200? elif self._has_object_annotations():

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could but I think this is more readable

return ImageData(url=self.row_data, **data_row_info)
elif self._row_contains(
(".txt", ".text", ".html")) and self._is_url():
return TextData(url=self.row_data, **data_row_info)
elif not self._is_url():
return TextData(text=self.row_data, **data_row_info)
return 'image'
elif (self._row_contains((".txt", ".text", ".html")) and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this be an or statement with line 198? if self._has_text_annotations():

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We would also need to check if there are not object annotations. This also would make the condition on 200 harder to read.

self._is_url()) or not self._is_url():
return 'text'
else:
# This is going to be urls that do not contain any file extensions
# This will only occur on skipped images.
# To use this converter on data with this url format
# filter out empty examples from the payload before deserializing.
# This condition will occur when a data row url does not contain a file extension
# and the label does not contain object annotations that indicate the media type.
# As a temporary workaround you can explicitly set the media_type
# in each label json payload before converting.
# We will eventually provide the media type in the export.
raise TypeError(
"Can't infer data type from row data. Remove empty examples before trying again. "
f"row_data: {self.row_data[:200]}")
f"Can't infer data type from row data. row_data: {self.row_data[:200]}"
)

def _has_object_annotations(self):
return len(self.label.objects) > 0
Expand Down
4 changes: 2 additions & 2 deletions labelbox/schema/model_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def model_run_data_rows(self):
['annotationGroups', 'pageInfo', 'endCursor'])

def annotation_groups(self):
""" `ModelRun.annotation_groups is being deprecated after version 3.9
""" `ModelRun.annotation_groups is being deprecated after version 3.9
in favor of ModelRun.model_run_data_rows`
"""
warnings.warn(
Expand Down Expand Up @@ -184,7 +184,7 @@ def delete_model_run_data_rows(self, data_row_ids):
})

def delete_annotation_groups(self, data_row_ids):
""" `ModelRun.delete_annotation_groups is being deprecated after version 3.9
""" `ModelRun.delete_annotation_groups is being deprecated after version 3.9
in favor of ModelRun.delete_model_run_data_rows`
"""
warnings.warn(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"ID": "ckw3ce1mc78b50zc30dqf0qhj", "DataRow ID": "ckw3cctc41uqg0zo5023e59hn", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F8821d3e2-9059-b616-9d4a-9723da3ea073-im1?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=FPOQz-alx3gHMK30ib1iPqJj0W0", "Label": {"objects": [{"featureId": "ckw3ce58u00003e66w9rh0onm", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 99.405, "y": 56.15}, {"x": 111.421, "y": 99.129}, {"x": 146.082, "y": 80.413}, {"x": 118.815, "y": 47.369}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3ce58u00003e66w9rh0onm?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:56.000Z", "Updated At": "2021-11-17T09:48:56.305Z", "Seconds to Label": 2.239, "External ID": "im1", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1mc78b50zc30dqf0qhj", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cea3f7b9t0zbn2tgp2y83", "DataRow ID": "ckw3cctc41uqo0zo5gpma1mr2", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F1bc65970-9880-78b4-d298-7a7ef7f8f3fc-im3?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=GZUsyQqYYlQPWBYv7GApFYlHXAc", "Label": {"objects": [], "classifications": [{"featureId": "ckw3ced5e00023e66236meh70", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ced5e00013e6652355ejd", "schemaId": "ckw3cdy207b6w0zbn2hgp3321", "title": "op1", "value": "op_1"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:02.000Z", "Updated At": "2021-11-17T09:49:02.220Z", "Seconds to Label": 5.373, "External ID": "im3", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cea3f7b9t0zbn2tgp2y83", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cec4v78ex0zc3aodwdekw", "DataRow ID": "ckw3cctc41uqs0zo52cy6eus1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2Fdb42c0e8-e005-3305-ed35-b021f109b6a7-im4?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=Hms9mqcIyWNDzoJUdvMa6_hRKY4", "Label": {"objects": [{"featureId": "ckw3cefl900033e66k41q6zpc", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 69.58, "y": 42.292}, {"x": 64.932, "y": 74.128}, {"x": 91.888, "y": 64.601}, {"x": 86.775, "y": 41.828}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3cefl900033e66k41q6zpc?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [{"featureId": "ckw3ceijf00053e669zaplftd", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ceijf00043e665ex22xkp", "schemaId": "ckw3cdy207b6y0zbn77201rux", "title": "op2", "value": "op_2"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:15.000Z", "Updated At": "2021-11-17T09:49:15.785Z", "Seconds to Label": 5, "External ID": "im4", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cec4v78ex0zc3aodwdekw", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3ce1s34c1i0zbp32067q4v", "DataRow ID": "ckw3cctc41uqk0zo52n31egs1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F402cbd62-9127-5b50-57d6-d77aaf89f643-im2?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=QIwHFUXN1mjBn8K4ZLWVQGQekmE", "Label": {}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:59.000Z", "Updated At": "2021-11-17T09:49:02.000Z", "Seconds to Label": 3.524, "External ID": "im2", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1s34c1i0zbp32067q4v", "Has Open Issues": 0, "Skipped": true}]
4 changes: 4 additions & 0 deletions tests/data/serialization/labelbox_v1/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ def test_image(file_path):
collection = LBV1Converter.deserialize([payload])
serialized = next(LBV1Converter.serialize(collection))

# We are storing the media types now.
payload['media_type'] = 'image'

assert serialized.keys() == payload.keys()

for key in serialized:
if key != 'Label':
assert serialized[key] == payload[key]
Expand Down
2 changes: 2 additions & 0 deletions tests/data/serialization/labelbox_v1/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ def test_text():
collection = LBV1Converter.deserialize([payload])
serialized = next(LBV1Converter.serialize(collection))

payload['media_type'] = 'text'

assert serialized.keys() == payload.keys()
for key in serialized:
if key != 'Label':
Expand Down
44 changes: 44 additions & 0 deletions tests/data/serialization/labelbox_v1/test_unknown_media.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import json

import pytest

from labelbox.data.serialization.labelbox_v1.converter import LBV1Converter


def test_image():
file_path = 'tests/data/assets/labelbox_v1/unkown_media_type_export.json'
with open(file_path, 'r') as file:
payload = json.load(file)

collection = list(LBV1Converter.deserialize(payload))
# One of the data rows is broken.
assert len(collection) != len(payload)

for row in payload:
row['media_type'] = 'image'

collection = LBV1Converter.deserialize(payload)
for idx, serialized in enumerate(LBV1Converter.serialize(collection)):
assert serialized.keys() == payload[idx].keys()
for key in serialized:
if key != 'Label':
assert serialized[key] == payload[idx][key]
elif key == 'Label':
for annotation_a, annotation_b in zip(
serialized[key]['objects'],
payload[idx][key]['objects']):
if not len(annotation_a['classifications']):
# We don't add a classification key to the payload if there is no classifications.
annotation_a.pop('classifications')

if isinstance(annotation_b.get('classifications'),
list) and len(
annotation_b['classifications']):
if isinstance(annotation_b['classifications'][0], list):
annotation_b['classifications'] = annotation_b[
'classifications'][0]

assert annotation_a == annotation_b


# After check the nd serializer on this shit.. It should work for almost everything (except the other horse shit..)
Loading