diff --git a/labelbox/data/annotation_types/annotation.py b/labelbox/data/annotation_types/annotation.py index d7fa8fb71..709960430 100644 --- a/labelbox/data/annotation_types/annotation.py +++ b/labelbox/data/annotation_types/annotation.py @@ -16,7 +16,7 @@ class BaseAnnotation(FeatureSchema, abc.ABC): extra: Dict[str, Any] = {} -class ClassificationAnnotation(BaseAnnotation): +class ClassificationAnnotation(BaseAnnotation, ConfidenceMixin): """Classification annotations (non localized) >>> ClassificationAnnotation( diff --git a/labelbox/data/annotation_types/classification/classification.py b/labelbox/data/annotation_types/classification/classification.py index 5f094d9a3..adca57908 100644 --- a/labelbox/data/annotation_types/classification/classification.py +++ b/labelbox/data/annotation_types/classification/classification.py @@ -13,7 +13,7 @@ # TODO: Replace when pydantic adds support for unions that don't coerce types -class _TempName(BaseModel): +class _TempName(ConfidenceMixin, BaseModel): name: str def dict(self, *args, **kwargs): @@ -43,7 +43,7 @@ def dict(self, *args, **kwargs) -> Dict[str, str]: return res -class Radio(BaseModel): +class Radio(ConfidenceMixin, BaseModel): """ A classification with only one selected option allowed >>> Radio(answer = ClassificationAnswer(name = "dog")) @@ -62,7 +62,7 @@ class Checklist(_TempName): answer: List[ClassificationAnswer] -class Text(BaseModel): +class Text(ConfidenceMixin, BaseModel): """ Free form text >>> Text(answer = "some text answer") diff --git a/labelbox/data/serialization/ndjson/classification.py b/labelbox/data/serialization/ndjson/classification.py index 7b48c3a30..98def47b9 100644 --- a/labelbox/data/serialization/ndjson/classification.py +++ b/labelbox/data/serialization/ndjson/classification.py @@ -120,25 +120,33 @@ def from_common(cls, radio: Radio, name: str, class NDText(NDAnnotation, NDTextSubclass): @classmethod - def from_common(cls, text: Text, name: str, feature_schema_id: Cuid, - extra: Dict[str, Any], data: Union[TextData, - ImageData]) -> "NDText": + def from_common(cls, + text: Text, + name: str, + feature_schema_id: Cuid, + extra: Dict[str, Any], + data: Union[TextData, ImageData], + confidence: Optional[float] = None) -> "NDText": return cls( answer=text.answer, data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), + confidence=confidence, ) class NDChecklist(NDAnnotation, NDChecklistSubclass, VideoSupported): @classmethod - def from_common( - cls, checklist: Checklist, name: str, feature_schema_id: Cuid, - extra: Dict[str, Any], data: Union[VideoData, TextData, - ImageData]) -> "NDChecklist": + def from_common(cls, + checklist: Checklist, + name: str, + feature_schema_id: Cuid, + extra: Dict[str, Any], + data: Union[VideoData, TextData, ImageData], + confidence: Optional[float] = None) -> "NDChecklist": return cls(answer=[ NDFeature(name=answer.name, schema_id=answer.feature_schema_id, @@ -149,15 +157,20 @@ def from_common( name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), - frames=extra.get('frames')) + frames=extra.get('frames'), + confidence=confidence) class NDRadio(NDAnnotation, NDRadioSubclass, VideoSupported): @classmethod - def from_common(cls, radio: Radio, name: str, feature_schema_id: Cuid, - extra: Dict[str, Any], data: Union[VideoData, TextData, - ImageData]) -> "NDRadio": + def from_common(cls, + radio: Radio, + name: str, + feature_schema_id: Cuid, + extra: Dict[str, Any], + data: Union[VideoData, TextData, ImageData], + confidence: Optional[float] = None) -> "NDRadio": return cls(answer=NDFeature(name=radio.answer.name, schema_id=radio.answer.feature_schema_id, confidence=radio.answer.confidence), @@ -165,7 +178,8 @@ def from_common(cls, radio: Radio, name: str, feature_schema_id: Cuid, name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), - frames=extra.get('frames')) + frames=extra.get('frames'), + confidence=confidence) class NDSubclassification: @@ -212,7 +226,8 @@ def to_common( value=annotation.to_common(), name=annotation.name, feature_schema_id=annotation.schema_id, - extra={'uuid': annotation.uuid}) + extra={'uuid': annotation.uuid}, + confidence=annotation.confidence) if getattr(annotation, 'frames', None) is None: return [common] results = [] @@ -235,7 +250,8 @@ def from_common( ) return classify_obj.from_common(annotation.value, annotation.name, annotation.feature_schema_id, - annotation.extra, data) + annotation.extra, data, + annotation.confidence) @staticmethod def lookup_classification( diff --git a/tests/data/annotation_types/test_text.py b/tests/data/annotation_types/test_text.py new file mode 100644 index 000000000..1b07d514f --- /dev/null +++ b/tests/data/annotation_types/test_text.py @@ -0,0 +1,12 @@ +from labelbox.data.annotation_types.classification.classification import Text + + +def test_text(): + text_entity = Text(answer="good job") + assert text_entity.answer == "good job" + + +def test_text_confidence(): + text_entity = Text(answer="good job", confidence=0.5) + assert text_entity.answer == "good job" + assert text_entity.confidence == 0.5 diff --git a/tests/data/assets/ndjson/text_import.json b/tests/data/assets/ndjson/text_import.json deleted file mode 100644 index f4132eb06..000000000 --- a/tests/data/assets/ndjson/text_import.json +++ /dev/null @@ -1,25 +0,0 @@ -[ - { - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "confidence": 0.42 - }, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": { - "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" - }, - { - "answer": [ - { - "schemaId": "ckrb1sfl8099e0y919v260awv" - } - ], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" - } -] \ No newline at end of file diff --git a/tests/data/assets/ndjson/text_import_name_only.json b/tests/data/assets/ndjson/text_import_name_only.json deleted file mode 100644 index 798677ba7..000000000 --- a/tests/data/assets/ndjson/text_import_name_only.json +++ /dev/null @@ -1,15 +0,0 @@ -[ - { - "answer": { "name": "answer a" }, - "name": "question 1", - "dataRow": { "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" - }, - { - "answer": [{ "name": "answer b", "confidence": 1 }], - "name": "question 2", - "dataRow": { "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" - } - ] - \ No newline at end of file diff --git a/tests/data/serialization/ndjson/test_checklist.py b/tests/data/serialization/ndjson/test_checklist.py new file mode 100644 index 000000000..dbf29a1b3 --- /dev/null +++ b/tests/data/serialization/ndjson/test_checklist.py @@ -0,0 +1,43 @@ +import json +from labelbox.data.annotation_types.annotation import ClassificationAnnotation +from labelbox.data.annotation_types.classification.classification import Checklist, ClassificationAnswer, Radio +from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.label import Label + +from labelbox.data.serialization.ndjson.converter import NDJsonConverter + + +def test_serialization(): + label = Label(uid="ckj7z2q0b0000jx6x0q2q7q0d", + data=TextData( + uid="bkj7z2q0b0000jx6x0q2q7q0d", + text="This is a test", + ), + annotations=[ + ClassificationAnnotation( + name="checkbox_question_geo", + confidence=0.5, + value=Checklist(answer=[ + ClassificationAnswer(name="first_answer"), + ClassificationAnswer(name="second_answer") + ])) + ]) + + serialized = NDJsonConverter.serialize([label]) + + res = next(serialized) + assert res['confidence'] == 0.5 + assert res['name'] == "checkbox_question_geo" + assert res['answer'][0]['name'] == "first_answer" + assert res['answer'][1]['name'] == "second_answer" + assert res['dataRow']['id'] == "bkj7z2q0b0000jx6x0q2q7q0d" + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + assert annotation.confidence == 0.5 + + annotation_value = annotation.value + assert type(annotation_value) is Checklist + assert annotation_value.answer[0].name == "first_answer" + assert annotation_value.answer[1].name == "second_answer" diff --git a/tests/data/serialization/ndjson/test_radio.py b/tests/data/serialization/ndjson/test_radio.py new file mode 100644 index 000000000..0e56f117c --- /dev/null +++ b/tests/data/serialization/ndjson/test_radio.py @@ -0,0 +1,39 @@ +import json +from labelbox.data.annotation_types.annotation import ClassificationAnnotation +from labelbox.data.annotation_types.classification.classification import ClassificationAnswer, Radio +from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.label import Label + +from labelbox.data.serialization.ndjson.converter import NDJsonConverter + + +def test_serialization(): + label = Label(uid="ckj7z2q0b0000jx6x0q2q7q0d", + data=TextData( + uid="bkj7z2q0b0000jx6x0q2q7q0d", + text="This is a test", + ), + annotations=[ + ClassificationAnnotation( + name="radio_question_geo", + confidence=0.5, + value=Radio(answer=ClassificationAnswer( + confidence=0.6, name="first_radio_answer"))) + ]) + + serialized = NDJsonConverter.serialize([label]) + res = next(serialized) + assert res['confidence'] == 0.5 + assert res['name'] == "radio_question_geo" + assert res['answer']['name'] == "first_radio_answer" + assert res['answer']['confidence'] == 0.6 + assert res['dataRow']['id'] == "bkj7z2q0b0000jx6x0q2q7q0d" + + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + assert annotation.confidence == 0.5 + + annotation_value = annotation.value + assert type(annotation_value) is Radio + assert annotation_value.answer.name == "first_radio_answer" diff --git a/tests/data/serialization/ndjson/test_text.py b/tests/data/serialization/ndjson/test_text.py index b1df95f1c..9d1b51e2c 100644 --- a/tests/data/serialization/ndjson/test_text.py +++ b/tests/data/serialization/ndjson/test_text.py @@ -1,20 +1,37 @@ import json +from labelbox.data.annotation_types.annotation import ClassificationAnnotation +from labelbox.data.annotation_types.classification.classification import ClassificationAnswer, Radio, Text +from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter -def test_text(): - with open('tests/data/assets/ndjson/text_import.json', 'r') as file: - data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == data +def test_serialization(): + label = Label(uid="ckj7z2q0b0000jx6x0q2q7q0d", + data=TextData( + uid="bkj7z2q0b0000jx6x0q2q7q0d", + text="This is a test", + ), + annotations=[ + ClassificationAnnotation( + name="radio_question_geo", + confidence=0.5, + value=Text(answer="first_radio_answer")) + ]) + serialized = NDJsonConverter.serialize([label]) + res = next(serialized) + assert res['confidence'] == 0.5 + assert res['name'] == "radio_question_geo" + assert res['answer'] == "first_radio_answer" + assert res['dataRow']['id'] == "bkj7z2q0b0000jx6x0q2q7q0d" -def test_text_name_only(): - with open('tests/data/assets/ndjson/text_import_name_only.json', - 'r') as file: - data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == data + deserialized = NDJsonConverter.deserialize([res]) + res = next(deserialized) + annotation = res.annotations[0] + assert annotation.confidence == 0.5 + + annotation_value = annotation.value + assert type(annotation_value) is Text + assert annotation_value.answer == "first_radio_answer" \ No newline at end of file diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index f7147ba60..64280a76e 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -658,7 +658,7 @@ def model_run_with_model_run_data_rows(client, configured_project, labels = wait_for_label_processing(configured_project) label_ids = [label.uid for label in labels] model_run.upsert_labels(label_ids) - time.sleep(3) + time.sleep(300) yield model_run model_run.delete() # TODO: Delete resources when that is possible .. @@ -670,6 +670,11 @@ def model_run_with_all_project_labels(client, configured_project, wait_for_label_processing): configured_project.enable_model_assisted_labeling() + data_row_ids = configured_project.data_row_ids + + configured_project._wait_until_data_rows_are_processed( + data_row_ids=data_row_ids) + upload_task = LabelImport.create_from_objects( client, configured_project.uid, f"label-import-{uuid.uuid4()}", model_run_predictions) @@ -680,7 +685,6 @@ def model_run_with_all_project_labels(client, configured_project, ) == 0, f"Label Import {upload_task.name} failed with errors {upload_task.errors}" wait_for_label_processing(configured_project) model_run.upsert_labels(project_id=configured_project.uid) - time.sleep(3) yield model_run model_run.delete() # TODO: Delete resources when that is possible ..