From 5c077f9c4ec1454f91c805a39a6e9ec1417328b5 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 11 May 2023 10:59:06 -0700 Subject: [PATCH 1/9] Add import for ndjson parser Replace ndjson.loads with parser.loads --- labelbox/data/serialization/labelbox_v1/converter.py | 3 ++- labelbox/schema/annotation_import.py | 3 ++- labelbox/schema/batch.py | 1 + labelbox/schema/bulk_import_request.py | 5 +++-- labelbox/schema/dataset.py | 1 + labelbox/schema/model_run.py | 3 ++- labelbox/schema/project.py | 3 ++- labelbox/schema/task.py | 3 ++- tests/data/serialization/ndjson/test_video.py | 1 + tests/integration/annotation_import/conftest.py | 5 +++-- .../annotation_import/test_bulk_import_request.py | 1 + .../annotation_import/test_mea_prediction_import.py | 1 + .../integration/annotation_import/test_ndjson_validation.py | 1 + .../annotation_import/test_upsert_prediction_import.py | 1 + 14 files changed, 23 insertions(+), 9 deletions(-) diff --git a/labelbox/data/serialization/labelbox_v1/converter.py b/labelbox/data/serialization/labelbox_v1/converter.py index 946abdc1e..53c439cfd 100644 --- a/labelbox/data/serialization/labelbox_v1/converter.py +++ b/labelbox/data/serialization/labelbox_v1/converter.py @@ -3,6 +3,7 @@ import logging import ndjson +from labelbox.data.serialization.ndjson import parser import requests from copy import deepcopy from requests.exceptions import HTTPError @@ -91,7 +92,7 @@ def _process(self, value): value = deepcopy(value) if 'frames' in value['Label']: req = self._request(value) - value['Label'] = ndjson.loads(req) + value['Label'] = parser.loads(req) return value @retry.Retry(predicate=retry.if_exception_type(HTTPError)) diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py index a22525816..f4104ffae 100644 --- a/labelbox/schema/annotation_import.py +++ b/labelbox/schema/annotation_import.py @@ -7,6 +7,7 @@ import backoff import ndjson +from labelbox.data.serialization.ndjson import parser import requests from tqdm import tqdm # type: ignore @@ -132,7 +133,7 @@ def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: response = requests.get(url) response.raise_for_status() - return ndjson.loads(response.text) + return parser.loads(response.text) @classmethod def _create_from_bytes(cls, client, variables, query_str, file_name, diff --git a/labelbox/schema/batch.py b/labelbox/schema/batch.py index f45e7e919..a4230771a 100644 --- a/labelbox/schema/batch.py +++ b/labelbox/schema/batch.py @@ -5,6 +5,7 @@ from labelbox.exceptions import LabelboxError, ResourceNotFoundError from io import StringIO import ndjson +from labelbox.data.serialization.ndjson import parser import requests import logging import time diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 6e5585a91..c4cd7e161 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -8,6 +8,7 @@ import pydantic import backoff import ndjson +from labelbox.data.serialization.ndjson import parser import requests from pydantic import BaseModel, root_validator, validator from typing_extensions import Literal @@ -172,7 +173,7 @@ def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: """ response = requests.get(url) response.raise_for_status() - return ndjson.loads(response.text) + return parser.loads(response.text) def refresh(self) -> None: """Synchronizes values of all fields with the database. @@ -258,7 +259,7 @@ def create_from_url(cls, "Validation is turned on. The file will be downloaded locally and processed before uploading." ) res = requests.get(url) - data = ndjson.loads(res.text) + data = parser.loads(res.text) _validate_ndjson(data, client.get_project(project_id)) query_str = """mutation createBulkImportRequestPyApi( diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 7a6df8b16..15025039a 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -5,6 +5,7 @@ from collections.abc import Iterable import time import ndjson +from labelbox.data.serialization.ndjson import parser from itertools import islice from concurrent.futures import ThreadPoolExecutor, as_completed diff --git a/labelbox/schema/model_run.py b/labelbox/schema/model_run.py index fed6cbb04..3d90a58ca 100644 --- a/labelbox/schema/model_run.py +++ b/labelbox/schema/model_run.py @@ -6,6 +6,7 @@ import logging import requests import ndjson +from labelbox.data.serialization.ndjson import parser from enum import Enum from labelbox.pagination import PaginatedCollection @@ -489,7 +490,7 @@ def export_labels( else: response = requests.get(url) response.raise_for_status() - return ndjson.loads(response.content) + return parser.loads(response.content) timeout_seconds -= sleep_time if timeout_seconds <= 0: diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 8b469d8a9..5e953013d 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -8,6 +8,7 @@ from urllib.parse import urlparse import ndjson +from labelbox.data.serialization.ndjson import parser import requests from labelbox import utils @@ -244,7 +245,7 @@ def export_queued_data_rows( download_url = res["downloadUrl"] response = requests.get(download_url) response.raise_for_status() - return ndjson.loads(response.text) + return parser.loads(response.text) elif res["status"] == "FAILED": raise LabelboxError("Data row export failed.") diff --git a/labelbox/schema/task.py b/labelbox/schema/task.py index ea4b83dfe..7b11f4415 100644 --- a/labelbox/schema/task.py +++ b/labelbox/schema/task.py @@ -4,6 +4,7 @@ import time from typing import TYPE_CHECKING, Callable, Optional, Dict, Any, List, Union import ndjson +from labelbox.data.serialization.ndjson import parser from labelbox.exceptions import ResourceNotFoundError from labelbox.orm.db_object import DbObject @@ -147,7 +148,7 @@ def download_result(remote_json_field: Optional[str], format: str): if format == 'json': return response.json() elif format == 'ndjson': - return ndjson.loads(response.text) + return parser.loads(response.text) else: raise ValueError( "Expected the result format to be either `ndjson` or `json`." diff --git a/tests/data/serialization/ndjson/test_video.py b/tests/data/serialization/ndjson/test_video.py index ae1adcbed..6c683f481 100644 --- a/tests/data/serialization/ndjson/test_video.py +++ b/tests/data/serialization/ndjson/test_video.py @@ -10,6 +10,7 @@ from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.video import VideoObjectAnnotation import ndjson +from labelbox.data.serialization.ndjson import parser from labelbox.data.serialization.ndjson.converter import NDJsonConverter from labelbox.schema.annotation_import import MALPredictionImport diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index a900f5669..f81a5b2ab 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -4,6 +4,7 @@ import time import requests import ndjson +from labelbox.data.serialization.ndjson import parser from typing import Type from labelbox.schema.labeling_frontend import LabelingFrontend @@ -884,7 +885,7 @@ class AnnotationImportTestHelpers: def assert_file_content(cls, url: str, predictions): response = requests.get(url) predictions = cls._convert_to_plain_object(predictions) - assert ndjson.loads(response.text) == predictions + assert parser.loads(response.text) == predictions @staticmethod def check_running_state(req, name, url=None): @@ -899,7 +900,7 @@ def check_running_state(req, name, url=None): def _convert_to_plain_object(obj): """Some Python objects e.g. tuples can't be compared with JSON serialized data, serialize to JSON and deserialize to get plain objects""" json_str = ndjson.dumps(obj) - return ndjson.loads(json_str) + return parser.loads(json_str) @pytest.fixture diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index 382467951..ba5a45ff6 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -1,5 +1,6 @@ import uuid import ndjson +from labelbox.data.serialization.ndjson import parser import pytest import random from labelbox.data.annotation_types.annotation import ObjectAnnotation diff --git a/tests/integration/annotation_import/test_mea_prediction_import.py b/tests/integration/annotation_import/test_mea_prediction_import.py index fb7ff50ca..b3cb71a39 100644 --- a/tests/integration/annotation_import/test_mea_prediction_import.py +++ b/tests/integration/annotation_import/test_mea_prediction_import.py @@ -1,5 +1,6 @@ import uuid import ndjson +from labelbox.data.serialization.ndjson import parser import pytest from labelbox.schema.annotation_import import AnnotationImportState, MEAPredictionImport diff --git a/tests/integration/annotation_import/test_ndjson_validation.py b/tests/integration/annotation_import/test_ndjson_validation.py index 355c4909e..521a4c08b 100644 --- a/tests/integration/annotation_import/test_ndjson_validation.py +++ b/tests/integration/annotation_import/test_ndjson_validation.py @@ -1,5 +1,6 @@ import pytest import ndjson +from labelbox.data.serialization.ndjson import parser from pytest_cases import parametrize, fixture_ref from labelbox.exceptions import MALValidationError diff --git a/tests/integration/annotation_import/test_upsert_prediction_import.py b/tests/integration/annotation_import/test_upsert_prediction_import.py index eb1fa9d80..48764f2f3 100644 --- a/tests/integration/annotation_import/test_upsert_prediction_import.py +++ b/tests/integration/annotation_import/test_upsert_prediction_import.py @@ -1,5 +1,6 @@ import uuid import ndjson +from labelbox.data.serialization.ndjson import parser import pytest from labelbox.schema.annotation_import import AnnotationImportState, MEAPredictionImport From 142e3820d0593588986921d110a45bd9ab261613 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 11 May 2023 14:25:41 -0700 Subject: [PATCH 2/9] Add custom Decoder to be format json and delegate the rest to standard Python json decoder To be able to handle loading byte, bytearray etc. all other types as support by the json library itself --- labelbox/data/serialization/ndjson/parser.py | 28 ++++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/labelbox/data/serialization/ndjson/parser.py b/labelbox/data/serialization/ndjson/parser.py index 8f101936b..89bb1a9a8 100644 --- a/labelbox/data/serialization/ndjson/parser.py +++ b/labelbox/data/serialization/ndjson/parser.py @@ -1,20 +1,32 @@ +from collections.abc import Callable from io import FileIO, StringIO import json -from typing import Iterable, Union +from typing import Any, Iterable, Union -def loads(ndjson_string: str, **kwargs) -> list: - # NOTE: the consequence of this line would be conversion of 'literal' line breaks to commas - lines = ','.join(ndjson_string.splitlines()) - text = f"[{lines}]" # NOTE: this is a hack to make json.loads work for ndjson - return json.loads(text, **kwargs) +class NdjsonDecoder(json.JSONDecoder): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # self.parse_array = self._parse_array + # def _parse_array(self, *args, **kwargs): + # return list(self.scan_once(*args, **kwargs)) + + def decode(self, s: str, *args, **kwargs) -> Any: + lines = ','.join(s.splitlines()) + text = f"[{lines}]" # NOTE: this is a hack to make json.loads work for ndjson + return super().decode(text, *args, **kwargs) -def dumps(obj: list, **kwargs) -> str: + +def loads(ndjson_string, **kwargs) -> list: + kwargs.setdefault('cls', NdjsonDecoder) + return json.loads(ndjson_string, **kwargs) + +def dumps(obj, **kwargs) -> str: lines = map(lambda obj: json.dumps(obj, **kwargs), obj) return '\n'.join(lines) -def reader(io_handle: Union[StringIO, FileIO, Iterable], **kwargs): +def reader(io_handle, **kwargs): for line in io_handle: yield json.loads(line, **kwargs) From deb9cb913a62d270e4e41b58dbb25da82401d3f1 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 11 May 2023 16:49:20 -0700 Subject: [PATCH 3/9] Replaced ndjson.dumps --- labelbox/data/serialization/ndjson/parser.py | 9 ++++----- labelbox/schema/annotation_import.py | 2 +- labelbox/schema/bulk_import_request.py | 2 +- tests/integration/annotation_import/conftest.py | 2 +- .../annotation_import/test_bulk_import_request.py | 2 +- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/labelbox/data/serialization/ndjson/parser.py b/labelbox/data/serialization/ndjson/parser.py index 89bb1a9a8..ad41efdf9 100644 --- a/labelbox/data/serialization/ndjson/parser.py +++ b/labelbox/data/serialization/ndjson/parser.py @@ -1,18 +1,16 @@ -from collections.abc import Callable -from io import FileIO, StringIO import json -from typing import Any, Iterable, Union class NdjsonDecoder(json.JSONDecoder): + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # self.parse_array = self._parse_array # def _parse_array(self, *args, **kwargs): # return list(self.scan_once(*args, **kwargs)) - - def decode(self, s: str, *args, **kwargs) -> Any: + + def decode(self, s: str, *args, **kwargs): lines = ','.join(s.splitlines()) text = f"[{lines}]" # NOTE: this is a hack to make json.loads work for ndjson return super().decode(text, *args, **kwargs) @@ -22,6 +20,7 @@ def loads(ndjson_string, **kwargs) -> list: kwargs.setdefault('cls', NdjsonDecoder) return json.loads(ndjson_string, **kwargs) + def dumps(obj, **kwargs) -> str: lines = map(lambda obj: json.dumps(obj, **kwargs), obj) return '\n'.join(lines) diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py index f4104ffae..3f4ea473e 100644 --- a/labelbox/schema/annotation_import.py +++ b/labelbox/schema/annotation_import.py @@ -159,7 +159,7 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]], objects = serialize_labels(objects) cls._validate_data_rows(objects) - data_str = ndjson.dumps(objects) + data_str = parser.dumps(objects) if not data_str: raise ValueError(f"{object_name} cannot be empty") diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index c4cd7e161..a43796496 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -323,7 +323,7 @@ def create_from_objects(cls, if validate: _validate_ndjson(ndjson_predictions, client.get_project(project_id)) - data_str = ndjson.dumps(ndjson_predictions) + data_str = parser.dumps(ndjson_predictions) if not data_str: raise ValueError('annotations cannot be empty') diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index f81a5b2ab..433318ddb 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -899,7 +899,7 @@ def check_running_state(req, name, url=None): @staticmethod def _convert_to_plain_object(obj): """Some Python objects e.g. tuples can't be compared with JSON serialized data, serialize to JSON and deserialize to get plain objects""" - json_str = ndjson.dumps(obj) + json_str = parser.dumps(obj) return parser.loads(json_str) diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index ba5a45ff6..559665498 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -160,7 +160,7 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): @pytest.mark.slow def test_wait_till_done(rectangle_inference, configured_project): name = str(uuid.uuid4()) - url = configured_project.client.upload_data(content=ndjson.dumps( + url = configured_project.client.upload_data(content=parser.dumps( [rectangle_inference]), sign=True) bulk_import_request = configured_project.upload_annotations(name=name, From 57ea354780ca1e115855500a5de0eaa986ea17be Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 11 May 2023 17:25:16 -0700 Subject: [PATCH 4/9] Replace ndjson.reader --- labelbox/schema/batch.py | 2 +- labelbox/schema/bulk_import_request.py | 2 +- labelbox/schema/dataset.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/labelbox/schema/batch.py b/labelbox/schema/batch.py index a4230771a..9d87031f4 100644 --- a/labelbox/schema/batch.py +++ b/labelbox/schema/batch.py @@ -119,7 +119,7 @@ def export_data_rows(self, download_url = res["downloadUrl"] response = requests.get(download_url) response.raise_for_status() - reader = ndjson.reader(StringIO(response.text)) + reader = parser.reader(StringIO(response.text)) return ( Entity.DataRow(self.client, result) for result in reader) elif res["status"] == "FAILED": diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index a43796496..9c06759bc 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -367,7 +367,7 @@ def create_from_local_file(cls, with file.open('rb') as f: if validate_file: - reader = ndjson.reader(f) + reader = parser.reader(f) # ensure that the underlying json load call is valid # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 # by iterating through the file so we only store diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 15025039a..46ae1dff8 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -522,7 +522,7 @@ def export_data_rows(self, download_url = res["downloadUrl"] response = requests.get(download_url) response.raise_for_status() - reader = ndjson.reader(StringIO(response.text)) + reader = parser.reader(StringIO(response.text)) return ( Entity.DataRow(self.client, result) for result in reader) elif res["status"] == "FAILED": From abebd71571c673a65295d92d3c01cd854fc52af6 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 11 May 2023 17:33:01 -0700 Subject: [PATCH 5/9] Remove all 3d party ndjson library imports --- labelbox/data/serialization/labelbox_v1/converter.py | 1 - labelbox/schema/annotation_import.py | 1 - labelbox/schema/batch.py | 1 - labelbox/schema/bulk_import_request.py | 1 - labelbox/schema/dataset.py | 2 +- labelbox/schema/model_run.py | 1 - labelbox/schema/project.py | 1 - labelbox/schema/task.py | 1 - tests/data/serialization/ndjson/test_video.py | 1 - tests/integration/annotation_import/conftest.py | 2 +- tests/integration/annotation_import/test_bulk_import_request.py | 1 - .../integration/annotation_import/test_mea_prediction_import.py | 1 - tests/integration/annotation_import/test_ndjson_validation.py | 2 +- .../annotation_import/test_upsert_prediction_import.py | 1 - tests/unit/test_ndjson_parsing.py | 2 +- 15 files changed, 4 insertions(+), 15 deletions(-) diff --git a/labelbox/data/serialization/labelbox_v1/converter.py b/labelbox/data/serialization/labelbox_v1/converter.py index 53c439cfd..058dc1c1c 100644 --- a/labelbox/data/serialization/labelbox_v1/converter.py +++ b/labelbox/data/serialization/labelbox_v1/converter.py @@ -2,7 +2,6 @@ from typing import Any, Dict, Generator, Iterable, Union import logging -import ndjson from labelbox.data.serialization.ndjson import parser import requests from copy import deepcopy diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py index 3f4ea473e..4eb57aff2 100644 --- a/labelbox/schema/annotation_import.py +++ b/labelbox/schema/annotation_import.py @@ -6,7 +6,6 @@ from typing import Any, BinaryIO, Dict, List, Union, TYPE_CHECKING, cast import backoff -import ndjson from labelbox.data.serialization.ndjson import parser import requests from tqdm import tqdm # type: ignore diff --git a/labelbox/schema/batch.py b/labelbox/schema/batch.py index 9d87031f4..c0c26227b 100644 --- a/labelbox/schema/batch.py +++ b/labelbox/schema/batch.py @@ -4,7 +4,6 @@ from labelbox.orm.model import Entity, Field, Relationship from labelbox.exceptions import LabelboxError, ResourceNotFoundError from io import StringIO -import ndjson from labelbox.data.serialization.ndjson import parser import requests import logging diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 9c06759bc..036f86d8b 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -7,7 +7,6 @@ from pathlib import Path import pydantic import backoff -import ndjson from labelbox.data.serialization.ndjson import parser import requests from pydantic import BaseModel, root_validator, validator diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 46ae1dff8..7e0564414 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -4,7 +4,7 @@ import logging from collections.abc import Iterable import time -import ndjson + from labelbox.data.serialization.ndjson import parser from itertools import islice diff --git a/labelbox/schema/model_run.py b/labelbox/schema/model_run.py index 3d90a58ca..1c8c0c0bf 100644 --- a/labelbox/schema/model_run.py +++ b/labelbox/schema/model_run.py @@ -5,7 +5,6 @@ import time import logging import requests -import ndjson from labelbox.data.serialization.ndjson import parser from enum import Enum diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 5e953013d..83e0b0b5a 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -7,7 +7,6 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union from urllib.parse import urlparse -import ndjson from labelbox.data.serialization.ndjson import parser import requests diff --git a/labelbox/schema/task.py b/labelbox/schema/task.py index 7b11f4415..d8d3686b2 100644 --- a/labelbox/schema/task.py +++ b/labelbox/schema/task.py @@ -3,7 +3,6 @@ import requests import time from typing import TYPE_CHECKING, Callable, Optional, Dict, Any, List, Union -import ndjson from labelbox.data.serialization.ndjson import parser from labelbox.exceptions import ResourceNotFoundError diff --git a/tests/data/serialization/ndjson/test_video.py b/tests/data/serialization/ndjson/test_video.py index 6c683f481..118044eb0 100644 --- a/tests/data/serialization/ndjson/test_video.py +++ b/tests/data/serialization/ndjson/test_video.py @@ -9,7 +9,6 @@ from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.video import VideoObjectAnnotation -import ndjson from labelbox.data.serialization.ndjson import parser from labelbox.data.serialization.ndjson.converter import NDJsonConverter diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 433318ddb..8d2eb1418 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -3,7 +3,7 @@ import pytest import time import requests -import ndjson + from labelbox.data.serialization.ndjson import parser from typing import Type diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index 559665498..0d130c4d3 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -1,5 +1,4 @@ import uuid -import ndjson from labelbox.data.serialization.ndjson import parser import pytest import random diff --git a/tests/integration/annotation_import/test_mea_prediction_import.py b/tests/integration/annotation_import/test_mea_prediction_import.py index b3cb71a39..4137438b5 100644 --- a/tests/integration/annotation_import/test_mea_prediction_import.py +++ b/tests/integration/annotation_import/test_mea_prediction_import.py @@ -1,5 +1,4 @@ import uuid -import ndjson from labelbox.data.serialization.ndjson import parser import pytest diff --git a/tests/integration/annotation_import/test_ndjson_validation.py b/tests/integration/annotation_import/test_ndjson_validation.py index 521a4c08b..d106624f1 100644 --- a/tests/integration/annotation_import/test_ndjson_validation.py +++ b/tests/integration/annotation_import/test_ndjson_validation.py @@ -1,5 +1,5 @@ import pytest -import ndjson + from labelbox.data.serialization.ndjson import parser from pytest_cases import parametrize, fixture_ref diff --git a/tests/integration/annotation_import/test_upsert_prediction_import.py b/tests/integration/annotation_import/test_upsert_prediction_import.py index 48764f2f3..c1fea86b4 100644 --- a/tests/integration/annotation_import/test_upsert_prediction_import.py +++ b/tests/integration/annotation_import/test_upsert_prediction_import.py @@ -1,5 +1,4 @@ import uuid -import ndjson from labelbox.data.serialization.ndjson import parser import pytest diff --git a/tests/unit/test_ndjson_parsing.py b/tests/unit/test_ndjson_parsing.py index 53878d989..7e59257ef 100644 --- a/tests/unit/test_ndjson_parsing.py +++ b/tests/unit/test_ndjson_parsing.py @@ -2,7 +2,7 @@ import random import time from io import StringIO -import ndjson + from labelbox.data.serialization.ndjson import parser From d128c82bcdb357a0a0946a6338125aa05472eaac Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 11 May 2023 17:37:26 -0700 Subject: [PATCH 6/9] Removed ndjson library from sdk --- mypy.ini | 3 --- requirements.txt | 1 - setup.py | 3 +-- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/mypy.ini b/mypy.ini index d6054a852..4e5254fa6 100644 --- a/mypy.ini +++ b/mypy.ini @@ -3,9 +3,6 @@ [mypy-backoff.*] ignore_missing_imports = True -[mypy-ndjson.*] -ignore_missing_imports = True - [mypy-google.*] ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt index d8000e448..ddbf87fac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ requests==2.22.0 -ndjson==0.3.1 backoff==1.10.0 google-api-core>=1.22.1 pydantic>=1.8,<2.0 diff --git a/setup.py b/setup.py index 319e7f87e..151a4ba15 100644 --- a/setup.py +++ b/setup.py @@ -21,8 +21,7 @@ packages=setuptools.find_packages(), install_requires=[ "backoff==1.10.0", "requests>=2.22.0", "google-api-core>=1.22.1", - "pydantic>=1.8,<2.0", "tqdm", "ndjson", - "backports-datetime-fromisoformat~=2.0" + "pydantic>=1.8,<2.0", "tqdm", "backports-datetime-fromisoformat~=2.0" ], extras_require={ 'data': [ From 5eb3b2763478eb4291deb57b0ec14903e30703ce Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 12 May 2023 11:01:07 -0700 Subject: [PATCH 7/9] Remove str return from dumps() due to an issue with an outdated version of mypy we are using --- labelbox/data/serialization/ndjson/parser.py | 2 +- labelbox/schema/annotation_import.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/labelbox/data/serialization/ndjson/parser.py b/labelbox/data/serialization/ndjson/parser.py index ad41efdf9..6eeed67e7 100644 --- a/labelbox/data/serialization/ndjson/parser.py +++ b/labelbox/data/serialization/ndjson/parser.py @@ -21,7 +21,7 @@ def loads(ndjson_string, **kwargs) -> list: return json.loads(ndjson_string, **kwargs) -def dumps(obj, **kwargs) -> str: +def dumps(obj, **kwargs): lines = map(lambda obj: json.dumps(obj, **kwargs), obj) return '\n'.join(lines) diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py index 4eb57aff2..d84333e7c 100644 --- a/labelbox/schema/annotation_import.py +++ b/labelbox/schema/annotation_import.py @@ -162,7 +162,9 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]], if not data_str: raise ValueError(f"{object_name} cannot be empty") - return data_str.encode('utf-8') + return data_str.encode( + 'utf-8' + ) # NOTICE this method returns bytes, NOT BinaryIO... should have done io.BytesIO(...) but not going to change this at the moment since it works and fools mypy def refresh(self) -> None: """Synchronizes values of all fields with the database. From b50b48499593ea3c8de260f65327373ae3c0845b Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 12 May 2023 11:27:58 -0700 Subject: [PATCH 8/9] Add test to load bytes --- tests/unit/test_ndjson_parsing.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_ndjson_parsing.py b/tests/unit/test_ndjson_parsing.py index 7e59257ef..8162fa2da 100644 --- a/tests/unit/test_ndjson_parsing.py +++ b/tests/unit/test_ndjson_parsing.py @@ -1,6 +1,4 @@ import ast -import random -import time from io import StringIO from labelbox.data.serialization.ndjson import parser @@ -8,8 +6,18 @@ def test_loads(ndjson_content): expected_line, expected_objects = ndjson_content - parsed_line = parser.loads(expected_line) + + assert parsed_line == expected_objects + assert parser.dumps(parsed_line) == expected_line + + +def test_loads_bytes(ndjson_content): + expected_line, expected_objects = ndjson_content + + bytes_line = expected_line.encode('utf-8') + parsed_line = parser.loads(bytes_line) + assert parsed_line == expected_objects assert parser.dumps(parsed_line) == expected_line From c366c6b079b4b6ffde166cd5df87d5ea3db00186 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 12 May 2023 11:34:00 -0700 Subject: [PATCH 9/9] Replace ndjson in tests Also add dump() to the parser, used in tests only --- labelbox/data/serialization/ndjson/parser.py | 9 +++++---- .../annotation_import/test_bulk_import_request.py | 4 ++-- .../annotation_import/test_mea_prediction_import.py | 2 +- .../annotation_import/test_ndjson_validation.py | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/labelbox/data/serialization/ndjson/parser.py b/labelbox/data/serialization/ndjson/parser.py index 6eeed67e7..fab41bb81 100644 --- a/labelbox/data/serialization/ndjson/parser.py +++ b/labelbox/data/serialization/ndjson/parser.py @@ -5,10 +5,6 @@ class NdjsonDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # self.parse_array = self._parse_array - - # def _parse_array(self, *args, **kwargs): - # return list(self.scan_once(*args, **kwargs)) def decode(self, s: str, *args, **kwargs): lines = ','.join(s.splitlines()) @@ -26,6 +22,11 @@ def dumps(obj, **kwargs): return '\n'.join(lines) +def dump(obj, io, **kwargs): + lines = dumps(obj, **kwargs) + io.write(lines) + + def reader(io_handle, **kwargs): for line in io_handle: yield json.loads(line, **kwargs) diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index 0d130c4d3..580c8b57e 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -90,7 +90,7 @@ def test_create_from_local_file(tmp_path, predictions, configured_project, file_name = f"{name}.ndjson" file_path = tmp_path / file_name with file_path.open("w") as f: - ndjson.dump(predictions, f) + parser.dump(predictions, f) bulk_import_request = configured_project.upload_annotations( name=name, annotations=str(file_path), validate=False) @@ -143,7 +143,7 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): repeat_uuid[1]['uuid'] = uid with file_path.open("w") as f: - ndjson.dump(repeat_uuid, f) + parser.dump(repeat_uuid, f) with pytest.raises(UuidError): configured_project.upload_annotations(name="name", diff --git a/tests/integration/annotation_import/test_mea_prediction_import.py b/tests/integration/annotation_import/test_mea_prediction_import.py index 4137438b5..a2aaf27ff 100644 --- a/tests/integration/annotation_import/test_mea_prediction_import.py +++ b/tests/integration/annotation_import/test_mea_prediction_import.py @@ -99,7 +99,7 @@ def test_create_from_local_file(tmp_path, model_run_with_data_rows, file_name = f"{name}.ndjson" file_path = tmp_path / file_name with file_path.open("w") as f: - ndjson.dump(object_predictions, f) + parser.dump(object_predictions, f) annotation_import = model_run_with_data_rows.add_predictions( name=name, predictions=str(file_path)) diff --git a/tests/integration/annotation_import/test_ndjson_validation.py b/tests/integration/annotation_import/test_ndjson_validation.py index d106624f1..c1ba314eb 100644 --- a/tests/integration/annotation_import/test_ndjson_validation.py +++ b/tests/integration/annotation_import/test_ndjson_validation.py @@ -225,7 +225,7 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): repeat_uuid[1]['uuid'] = 'test_uuid' with file_path.open("w") as f: - ndjson.dump(repeat_uuid, f) + parser.dump(repeat_uuid, f) with pytest.raises(MALValidationError): configured_project.upload_annotations(name="name",