-
Notifications
You must be signed in to change notification settings - Fork 68
video classification keyframe #345
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,56 +9,53 @@ | |
|
||
|
||
class LBV1ClassificationAnswer(LBV1Feature): | ||
... | ||
|
||
def to_common(self) -> ClassificationAnswer: | ||
return ClassificationAnswer(feature_schema_id=self.schema_id, | ||
name=self.title, | ||
keyframe=self.keyframe, | ||
extra={ | ||
'feature_id': self.feature_id, | ||
'value': self.value | ||
}) | ||
|
||
@classmethod | ||
def from_common( | ||
cls, | ||
answer: ClassificationAnnotation) -> "LBV1ClassificationAnswer": | ||
return cls(schema_id=answer.feature_schema_id, | ||
title=answer.name, | ||
value=answer.extra.get('value'), | ||
feature_id=answer.extra.get('feature_id'), | ||
keyframe=answer.keyframe) | ||
|
||
|
||
class LBV1Radio(LBV1Feature): | ||
answer: LBV1ClassificationAnswer | ||
|
||
def to_common(self) -> Radio: | ||
return Radio(answer=ClassificationAnswer( | ||
feature_schema_id=self.answer.schema_id, | ||
name=self.answer.title, | ||
extra={ | ||
'feature_id': self.answer.feature_id, | ||
'value': self.answer.value | ||
})) | ||
return Radio(answer=self.answer.to_common()) | ||
|
||
@classmethod | ||
def from_common(cls, radio: Radio, feature_schema_id: Cuid, | ||
**extra) -> "LBV1Radio": | ||
return cls(schema_id=feature_schema_id, | ||
answer=LBV1ClassificationAnswer( | ||
schema_id=radio.answer.feature_schema_id, | ||
title=radio.answer.name, | ||
value=radio.answer.extra.get('value'), | ||
feature_id=radio.answer.extra.get('feature_id')), | ||
answer=LBV1ClassificationAnswer.from_common(radio.answer), | ||
**extra) | ||
|
||
|
||
class LBV1Checklist(LBV1Feature): | ||
answers: List[LBV1ClassificationAnswer] | ||
|
||
def to_common(self) -> Checklist: | ||
return Checklist(answer=[ | ||
ClassificationAnswer(feature_schema_id=answer.schema_id, | ||
name=answer.title, | ||
extra={ | ||
'feature_id': answer.feature_id, | ||
'value': answer.value | ||
}) for answer in self.answers | ||
]) | ||
return Checklist(answer=[answer.to_common() for answer in self.answers]) | ||
|
||
@classmethod | ||
def from_common(cls, checklist: Checklist, feature_schema_id: Cuid, | ||
**extra) -> "LBV1Checklist": | ||
return cls(schema_id=feature_schema_id, | ||
answers=[ | ||
LBV1ClassificationAnswer( | ||
schema_id=answer.feature_schema_id, | ||
title=answer.name, | ||
value=answer.extra.get('value'), | ||
feature_id=answer.extra.get('feature_id')) | ||
LBV1ClassificationAnswer.from_common(answer) | ||
for answer in checklist.answer | ||
], | ||
**extra) | ||
|
@@ -68,25 +65,14 @@ class LBV1Dropdown(LBV1Feature): | |
answer: List[LBV1ClassificationAnswer] | ||
|
||
def to_common(self) -> Dropdown: | ||
return Dropdown(answer=[ | ||
ClassificationAnswer(feature_schema_id=answer.schema_id, | ||
name=answer.title, | ||
extra={ | ||
'feature_id': answer.feature_id, | ||
'value': answer.value | ||
}) for answer in self.answer | ||
]) | ||
return Dropdown(answer=[answer.to_common() for answer in self.answer]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably not related to this pr... but Dropdown only support 1 answer similar to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is a list because that is how we export it |
||
|
||
@classmethod | ||
def from_common(cls, dropdown: Dropdown, feature_schema_id: Cuid, | ||
**extra) -> "LBV1Dropdown": | ||
return cls(schema_id=feature_schema_id, | ||
answer=[ | ||
LBV1ClassificationAnswer( | ||
schema_id=answer.feature_schema_id, | ||
title=answer.name, | ||
value=answer.extra.get('value'), | ||
feature_id=answer.extra.get('feature_id')) | ||
LBV1ClassificationAnswer.from_common(answer) | ||
for answer in dropdown.answer | ||
], | ||
**extra) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -137,20 +137,17 @@ class LBV1Label(BaseModel): | |
label_url: Optional[str] = Extra('View Label') | ||
has_open_issues: Optional[float] = Extra('Has Open Issues') | ||
skipped: Optional[bool] = Extra('Skipped') | ||
media_type: Optional[str] = Extra('media_type') | ||
|
||
def to_common(self) -> Label: | ||
if isinstance(self.label, list): | ||
annotations = [] | ||
for lbl in self.label: | ||
annotations.extend(lbl.to_common()) | ||
data = VideoData(url=self.row_data, | ||
external_id=self.external_id, | ||
uid=self.data_row_id) | ||
else: | ||
annotations = self.label.to_common() | ||
data = self._infer_media_type() | ||
|
||
return Label(data=data, | ||
return Label(data=self._data_row_to_common(), | ||
uid=self.id, | ||
annotations=annotations, | ||
extra={ | ||
|
@@ -174,44 +171,49 @@ def from_common(cls, label: Label): | |
external_id=label.data.external_id, | ||
**label.extra) | ||
|
||
def _infer_media_type(self): | ||
# Video annotations are formatted differently from text and images | ||
# So we only need to differentiate those two | ||
def _data_row_to_common(self) -> Union[ImageData, TextData, VideoData]: | ||
# Use data row information to construct the appropriate annotatin type | ||
data_row_info = { | ||
'url' if self._is_url() else 'text': self.row_data, | ||
'external_id': self.external_id, | ||
'uid': self.data_row_id | ||
} | ||
|
||
self.media_type = self.media_type or self._infer_media_type() | ||
media_mapping = { | ||
'text': TextData, | ||
'image': ImageData, | ||
'video': VideoData | ||
} | ||
if self.media_type not in media_mapping: | ||
raise ValueError( | ||
f"Annotation types are only supported for {list(media_mapping)} media types." | ||
f" Found {self.media_type}.") | ||
return media_mapping[self.media_type](**data_row_info) | ||
|
||
def _infer_media_type(self) -> str: | ||
# Determines the data row type based on the label content | ||
if isinstance(self.label, list): | ||
return 'video' | ||
if self._has_text_annotations(): | ||
# If it has text annotations then it must be text | ||
if self._is_url(): | ||
return TextData(url=self.row_data, **data_row_info) | ||
else: | ||
return TextData(text=self.row_data, **data_row_info) | ||
return 'text' | ||
elif self._has_object_annotations(): | ||
# If it has object annotations and none are text annotations then it must be an image | ||
if self._is_url(): | ||
return ImageData(url=self.row_data, **data_row_info) | ||
else: | ||
return ImageData(text=self.row_data, **data_row_info) | ||
return 'image' | ||
else: | ||
# no annotations to infer data type from. | ||
# Use information from the row_data format if possible. | ||
if self._row_contains((".jpg", ".png", ".jpeg")) and self._is_url(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can this be an or statement with line 200? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could but I think this is more readable |
||
return ImageData(url=self.row_data, **data_row_info) | ||
elif self._row_contains( | ||
(".txt", ".text", ".html")) and self._is_url(): | ||
return TextData(url=self.row_data, **data_row_info) | ||
elif not self._is_url(): | ||
return TextData(text=self.row_data, **data_row_info) | ||
return 'image' | ||
elif (self._row_contains((".txt", ".text", ".html")) and | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can this be an or statement with line 198? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We would also need to check if there are not object annotations. This also would make the condition on 200 harder to read. |
||
self._is_url()) or not self._is_url(): | ||
return 'text' | ||
else: | ||
# This is going to be urls that do not contain any file extensions | ||
# This will only occur on skipped images. | ||
# To use this converter on data with this url format | ||
# filter out empty examples from the payload before deserializing. | ||
# This condition will occur when a data row url does not contain a file extension | ||
# and the label does not contain object annotations that indicate the media type. | ||
# As a temporary workaround you can explicitly set the media_type | ||
# in each label json payload before converting. | ||
# We will eventually provide the media type in the export. | ||
raise TypeError( | ||
"Can't infer data type from row data. Remove empty examples before trying again. " | ||
f"row_data: {self.row_data[:200]}") | ||
f"Can't infer data type from row data. row_data: {self.row_data[:200]}" | ||
) | ||
|
||
def _has_object_annotations(self): | ||
return len(self.label.objects) > 0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[{"ID": "ckw3ce1mc78b50zc30dqf0qhj", "DataRow ID": "ckw3cctc41uqg0zo5023e59hn", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F8821d3e2-9059-b616-9d4a-9723da3ea073-im1?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=FPOQz-alx3gHMK30ib1iPqJj0W0", "Label": {"objects": [{"featureId": "ckw3ce58u00003e66w9rh0onm", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 99.405, "y": 56.15}, {"x": 111.421, "y": 99.129}, {"x": 146.082, "y": 80.413}, {"x": 118.815, "y": 47.369}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3ce58u00003e66w9rh0onm?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:56.000Z", "Updated At": "2021-11-17T09:48:56.305Z", "Seconds to Label": 2.239, "External ID": "im1", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1mc78b50zc30dqf0qhj", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cea3f7b9t0zbn2tgp2y83", "DataRow ID": "ckw3cctc41uqo0zo5gpma1mr2", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F1bc65970-9880-78b4-d298-7a7ef7f8f3fc-im3?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=GZUsyQqYYlQPWBYv7GApFYlHXAc", "Label": {"objects": [], "classifications": [{"featureId": "ckw3ced5e00023e66236meh70", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ced5e00013e6652355ejd", "schemaId": "ckw3cdy207b6w0zbn2hgp3321", "title": "op1", "value": "op_1"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:02.000Z", "Updated At": "2021-11-17T09:49:02.220Z", "Seconds to Label": 5.373, "External ID": "im3", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cea3f7b9t0zbn2tgp2y83", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3cec4v78ex0zc3aodwdekw", "DataRow ID": "ckw3cctc41uqs0zo52cy6eus1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2Fdb42c0e8-e005-3305-ed35-b021f109b6a7-im4?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=Hms9mqcIyWNDzoJUdvMa6_hRKY4", "Label": {"objects": [{"featureId": "ckw3cefl900033e66k41q6zpc", "schemaId": "ckw3cdy207b6t0zbn3sh52xoh", "color": "#1CE6FF", "title": "obj", "value": "obj", "polygon": [{"x": 69.58, "y": 42.292}, {"x": 64.932, "y": 74.128}, {"x": 91.888, "y": 64.601}, {"x": 86.775, "y": 41.828}], "instanceURI": "https://api.labelbox.com/masks/feature/ckw3cefl900033e66k41q6zpc?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiaWF0IjoxNjM3MTU3NDI5LCJleHAiOjE2Mzk3NDk0Mjl9.L4PvjcpSIWV_9R5_M7c_24sj79wtserE_2hkx3ZeCMU"}], "classifications": [{"featureId": "ckw3ceijf00053e669zaplftd", "schemaId": "ckw3cdy207b6v0zbn11gp0zz4", "title": "classification", "value": "classification", "answer": {"featureId": "ckw3ceijf00043e665ex22xkp", "schemaId": "ckw3cdy207b6y0zbn77201rux", "title": "op2", "value": "op_2"}}], "relationships": []}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:49:15.000Z", "Updated At": "2021-11-17T09:49:15.785Z", "Seconds to Label": 5, "External ID": "im4", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3cec4v78ex0zc3aodwdekw", "Has Open Issues": 0, "Skipped": false}, {"ID": "ckw3ce1s34c1i0zbp32067q4v", "DataRow ID": "ckw3cctc41uqk0zo52n31egs1", "Labeled Data": "https://storage.labelbox.com/ckk4q1vgapsau07324awnsjq2%2F402cbd62-9127-5b50-57d6-d77aaf89f643-im2?Expires=1638367029433&KeyName=labelbox-assets-key-3&Signature=QIwHFUXN1mjBn8K4ZLWVQGQekmE", "Label": {}, "Created By": "msokoloff@labelbox.com", "Project Name": "test", "Created At": "2021-11-17T09:48:59.000Z", "Updated At": "2021-11-17T09:49:02.000Z", "Seconds to Label": 3.524, "External ID": "im2", "Agreement": -1, "Benchmark Agreement": -1, "Benchmark ID": null, "Dataset Name": "no-name", "Reviews": [], "View Label": "https://editor.labelbox.com?project=ckw3cd90b38br0zd4dh4n1xou&label=ckw3ce1s34c1i0zbp32067q4v", "Has Open Issues": 0, "Skipped": true}] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import json | ||
|
||
import pytest | ||
|
||
from labelbox.data.serialization.labelbox_v1.converter import LBV1Converter | ||
|
||
|
||
def test_image(): | ||
file_path = 'tests/data/assets/labelbox_v1/unkown_media_type_export.json' | ||
with open(file_path, 'r') as file: | ||
payload = json.load(file) | ||
|
||
collection = list(LBV1Converter.deserialize(payload)) | ||
# One of the data rows is broken. | ||
assert len(collection) != len(payload) | ||
|
||
for row in payload: | ||
row['media_type'] = 'image' | ||
|
||
collection = LBV1Converter.deserialize(payload) | ||
for idx, serialized in enumerate(LBV1Converter.serialize(collection)): | ||
assert serialized.keys() == payload[idx].keys() | ||
for key in serialized: | ||
if key != 'Label': | ||
assert serialized[key] == payload[idx][key] | ||
elif key == 'Label': | ||
for annotation_a, annotation_b in zip( | ||
serialized[key]['objects'], | ||
payload[idx][key]['objects']): | ||
if not len(annotation_a['classifications']): | ||
# We don't add a classification key to the payload if there is no classifications. | ||
annotation_a.pop('classifications') | ||
|
||
if isinstance(annotation_b.get('classifications'), | ||
list) and len( | ||
annotation_b['classifications']): | ||
if isinstance(annotation_b['classifications'][0], list): | ||
annotation_b['classifications'] = annotation_b[ | ||
'classifications'][0] | ||
|
||
assert annotation_a == annotation_b | ||
|
||
|
||
# After check the nd serializer on this shit.. It should work for almost everything (except the other horse shit..) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
indepdent -> independent