Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions labelbox/data/serialization/labelbox_v1/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, Dict, Generator, Iterable, Union
import logging

import ndjson
from labelbox.data.serialization.ndjson import parser
import requests
from copy import deepcopy
from requests.exceptions import HTTPError
Expand Down Expand Up @@ -91,7 +91,7 @@ def _process(self, value):
value = deepcopy(value)
if 'frames' in value['Label']:
req = self._request(value)
value['Label'] = ndjson.loads(req)
value['Label'] = parser.loads(req)
return value

@retry.Retry(predicate=retry.if_exception_type(HTTPError))
Expand Down
30 changes: 21 additions & 9 deletions labelbox/data/serialization/ndjson/parser.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,32 @@
from io import FileIO, StringIO
import json
from typing import Iterable, Union


def loads(ndjson_string: str, **kwargs) -> list:
# NOTE: the consequence of this line would be conversion of 'literal' line breaks to commas
lines = ','.join(ndjson_string.splitlines())
text = f"[{lines}]" # NOTE: this is a hack to make json.loads work for ndjson
return json.loads(text, **kwargs)
class NdjsonDecoder(json.JSONDecoder):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def dumps(obj: list, **kwargs) -> str:
def decode(self, s: str, *args, **kwargs):
lines = ','.join(s.splitlines())
text = f"[{lines}]" # NOTE: this is a hack to make json.loads work for ndjson
return super().decode(text, *args, **kwargs)


def loads(ndjson_string, **kwargs) -> list:
kwargs.setdefault('cls', NdjsonDecoder)
return json.loads(ndjson_string, **kwargs)


def dumps(obj, **kwargs):
lines = map(lambda obj: json.dumps(obj, **kwargs), obj)
return '\n'.join(lines)


def reader(io_handle: Union[StringIO, FileIO, Iterable], **kwargs):
def dump(obj, io, **kwargs):
lines = dumps(obj, **kwargs)
io.write(lines)


def reader(io_handle, **kwargs):
for line in io_handle:
yield json.loads(line, **kwargs)
10 changes: 6 additions & 4 deletions labelbox/schema/annotation_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, BinaryIO, Dict, List, Union, TYPE_CHECKING, cast

import backoff
import ndjson
from labelbox.data.serialization.ndjson import parser
import requests
from tqdm import tqdm # type: ignore

Expand Down Expand Up @@ -132,7 +132,7 @@ def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]:

response = requests.get(url)
response.raise_for_status()
return ndjson.loads(response.text)
return parser.loads(response.text)

@classmethod
def _create_from_bytes(cls, client, variables, query_str, file_name,
Expand All @@ -158,11 +158,13 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]],
objects = serialize_labels(objects)
cls._validate_data_rows(objects)

data_str = ndjson.dumps(objects)
data_str = parser.dumps(objects)
if not data_str:
raise ValueError(f"{object_name} cannot be empty")

return data_str.encode('utf-8')
return data_str.encode(
'utf-8'
) # NOTICE this method returns bytes, NOT BinaryIO... should have done io.BytesIO(...) but not going to change this at the moment since it works and fools mypy

def refresh(self) -> None:
"""Synchronizes values of all fields with the database.
Expand Down
4 changes: 2 additions & 2 deletions labelbox/schema/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from labelbox.orm.model import Entity, Field, Relationship
from labelbox.exceptions import LabelboxError, ResourceNotFoundError
from io import StringIO
import ndjson
from labelbox.data.serialization.ndjson import parser
import requests
import logging
import time
Expand Down Expand Up @@ -118,7 +118,7 @@ def export_data_rows(self,
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
reader = ndjson.reader(StringIO(response.text))
reader = parser.reader(StringIO(response.text))
return (
Entity.DataRow(self.client, result) for result in reader)
elif res["status"] == "FAILED":
Expand Down
10 changes: 5 additions & 5 deletions labelbox/schema/bulk_import_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pathlib import Path
import pydantic
import backoff
import ndjson
from labelbox.data.serialization.ndjson import parser
import requests
from pydantic import BaseModel, root_validator, validator
from typing_extensions import Literal
Expand Down Expand Up @@ -172,7 +172,7 @@ def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]:
"""
response = requests.get(url)
response.raise_for_status()
return ndjson.loads(response.text)
return parser.loads(response.text)

def refresh(self) -> None:
"""Synchronizes values of all fields with the database.
Expand Down Expand Up @@ -258,7 +258,7 @@ def create_from_url(cls,
"Validation is turned on. The file will be downloaded locally and processed before uploading."
)
res = requests.get(url)
data = ndjson.loads(res.text)
data = parser.loads(res.text)
_validate_ndjson(data, client.get_project(project_id))

query_str = """mutation createBulkImportRequestPyApi(
Expand Down Expand Up @@ -322,7 +322,7 @@ def create_from_objects(cls,
if validate:
_validate_ndjson(ndjson_predictions, client.get_project(project_id))

data_str = ndjson.dumps(ndjson_predictions)
data_str = parser.dumps(ndjson_predictions)
if not data_str:
raise ValueError('annotations cannot be empty')

Expand Down Expand Up @@ -366,7 +366,7 @@ def create_from_local_file(cls,

with file.open('rb') as f:
if validate_file:
reader = ndjson.reader(f)
reader = parser.reader(f)
# ensure that the underlying json load call is valid
# https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53
# by iterating through the file so we only store
Expand Down
5 changes: 3 additions & 2 deletions labelbox/schema/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import logging
from collections.abc import Iterable
import time
import ndjson

from labelbox.data.serialization.ndjson import parser
from itertools import islice

from concurrent.futures import ThreadPoolExecutor, as_completed
Expand Down Expand Up @@ -521,7 +522,7 @@ def export_data_rows(self,
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
reader = ndjson.reader(StringIO(response.text))
reader = parser.reader(StringIO(response.text))
return (
Entity.DataRow(self.client, result) for result in reader)
elif res["status"] == "FAILED":
Expand Down
4 changes: 2 additions & 2 deletions labelbox/schema/model_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import time
import logging
import requests
import ndjson
from labelbox.data.serialization.ndjson import parser
from enum import Enum

from labelbox.pagination import PaginatedCollection
Expand Down Expand Up @@ -489,7 +489,7 @@ def export_labels(
else:
response = requests.get(url)
response.raise_for_status()
return ndjson.loads(response.content)
return parser.loads(response.content)

timeout_seconds -= sleep_time
if timeout_seconds <= 0:
Expand Down
4 changes: 2 additions & 2 deletions labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
from urllib.parse import urlparse

import ndjson
from labelbox.data.serialization.ndjson import parser
import requests

from labelbox import utils
Expand Down Expand Up @@ -244,7 +244,7 @@ def export_queued_data_rows(
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
return ndjson.loads(response.text)
return parser.loads(response.text)
elif res["status"] == "FAILED":
raise LabelboxError("Data row export failed.")

Expand Down
4 changes: 2 additions & 2 deletions labelbox/schema/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import requests
import time
from typing import TYPE_CHECKING, Callable, Optional, Dict, Any, List, Union
import ndjson
from labelbox.data.serialization.ndjson import parser

from labelbox.exceptions import ResourceNotFoundError
from labelbox.orm.db_object import DbObject
Expand Down Expand Up @@ -147,7 +147,7 @@ def download_result(remote_json_field: Optional[str], format: str):
if format == 'json':
return response.json()
elif format == 'ndjson':
return ndjson.loads(response.text)
return parser.loads(response.text)
else:
raise ValueError(
"Expected the result format to be either `ndjson` or `json`."
Expand Down
3 changes: 0 additions & 3 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
[mypy-backoff.*]
ignore_missing_imports = True

[mypy-ndjson.*]
ignore_missing_imports = True

[mypy-google.*]
ignore_missing_imports = True

Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
requests==2.22.0
ndjson==0.3.1
backoff==1.10.0
google-api-core>=1.22.1
pydantic>=1.8,<2.0
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
packages=setuptools.find_packages(),
install_requires=[
"backoff==1.10.0", "requests>=2.22.0", "google-api-core>=1.22.1",
"pydantic>=1.8,<2.0", "tqdm", "ndjson",
"backports-datetime-fromisoformat~=2.0"
"pydantic>=1.8,<2.0", "tqdm", "backports-datetime-fromisoformat~=2.0"
],
extras_require={
'data': [
Expand Down
2 changes: 1 addition & 1 deletion tests/data/serialization/ndjson/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from labelbox.data.annotation_types.label import Label
from labelbox.data.annotation_types.video import VideoObjectAnnotation
import ndjson
from labelbox.data.serialization.ndjson import parser

from labelbox.data.serialization.ndjson.converter import NDJsonConverter
from labelbox.schema.annotation_import import MALPredictionImport
Expand Down
9 changes: 5 additions & 4 deletions tests/integration/annotation_import/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import pytest
import time
import requests
import ndjson

from labelbox.data.serialization.ndjson import parser

from typing import Type
from labelbox.schema.labeling_frontend import LabelingFrontend
Expand Down Expand Up @@ -884,7 +885,7 @@ class AnnotationImportTestHelpers:
def assert_file_content(cls, url: str, predictions):
response = requests.get(url)
predictions = cls._convert_to_plain_object(predictions)
assert ndjson.loads(response.text) == predictions
assert parser.loads(response.text) == predictions

@staticmethod
def check_running_state(req, name, url=None):
Expand All @@ -898,8 +899,8 @@ def check_running_state(req, name, url=None):
@staticmethod
def _convert_to_plain_object(obj):
"""Some Python objects e.g. tuples can't be compared with JSON serialized data, serialize to JSON and deserialize to get plain objects"""
json_str = ndjson.dumps(obj)
return ndjson.loads(json_str)
json_str = parser.dumps(obj)
return parser.loads(json_str)


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import uuid
import ndjson
from labelbox.data.serialization.ndjson import parser
import pytest
import random
from labelbox.data.annotation_types.annotation import ObjectAnnotation
Expand Down Expand Up @@ -90,7 +90,7 @@ def test_create_from_local_file(tmp_path, predictions, configured_project,
file_name = f"{name}.ndjson"
file_path = tmp_path / file_name
with file_path.open("w") as f:
ndjson.dump(predictions, f)
parser.dump(predictions, f)

bulk_import_request = configured_project.upload_annotations(
name=name, annotations=str(file_path), validate=False)
Expand Down Expand Up @@ -143,7 +143,7 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
repeat_uuid[1]['uuid'] = uid

with file_path.open("w") as f:
ndjson.dump(repeat_uuid, f)
parser.dump(repeat_uuid, f)

with pytest.raises(UuidError):
configured_project.upload_annotations(name="name",
Expand All @@ -159,7 +159,7 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
@pytest.mark.slow
def test_wait_till_done(rectangle_inference, configured_project):
name = str(uuid.uuid4())
url = configured_project.client.upload_data(content=ndjson.dumps(
url = configured_project.client.upload_data(content=parser.dumps(
[rectangle_inference]),
sign=True)
bulk_import_request = configured_project.upload_annotations(name=name,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import uuid
import ndjson
from labelbox.data.serialization.ndjson import parser
import pytest

from labelbox.schema.annotation_import import AnnotationImportState, MEAPredictionImport
Expand Down Expand Up @@ -99,7 +99,7 @@ def test_create_from_local_file(tmp_path, model_run_with_data_rows,
file_name = f"{name}.ndjson"
file_path = tmp_path / file_name
with file_path.open("w") as f:
ndjson.dump(object_predictions, f)
parser.dump(object_predictions, f)

annotation_import = model_run_with_data_rows.add_predictions(
name=name, predictions=str(file_path))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import ndjson

from labelbox.data.serialization.ndjson import parser
from pytest_cases import parametrize, fixture_ref

from labelbox.exceptions import MALValidationError
Expand Down Expand Up @@ -224,7 +225,7 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
repeat_uuid[1]['uuid'] = 'test_uuid'

with file_path.open("w") as f:
ndjson.dump(repeat_uuid, f)
parser.dump(repeat_uuid, f)

with pytest.raises(MALValidationError):
configured_project.upload_annotations(name="name",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import uuid
import ndjson
from labelbox.data.serialization.ndjson import parser
import pytest

from labelbox.schema.annotation_import import AnnotationImportState, MEAPredictionImport
Expand Down
16 changes: 12 additions & 4 deletions tests/unit/test_ndjson_parsing.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
import ast
import random
import time
from io import StringIO
import ndjson

from labelbox.data.serialization.ndjson import parser


def test_loads(ndjson_content):
expected_line, expected_objects = ndjson_content

parsed_line = parser.loads(expected_line)

assert parsed_line == expected_objects
assert parser.dumps(parsed_line) == expected_line


def test_loads_bytes(ndjson_content):
expected_line, expected_objects = ndjson_content

bytes_line = expected_line.encode('utf-8')
parsed_line = parser.loads(bytes_line)

assert parsed_line == expected_objects
assert parser.dumps(parsed_line) == expected_line

Expand Down