Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions labelbox/data/serialization/ndjson/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from io import FileIO, StringIO
import json
from typing import Iterable, Union


def loads(ndjson_string: str, **kwargs) -> list:
# NOTE: the consequence of this line would be conversion of 'literal' line breaks to commas
lines = ','.join(ndjson_string.splitlines())
text = f"[{lines}]" # NOTE: this is a hack to make json.loads work for ndjson
return json.loads(text, **kwargs)


def dumps(obj: list, **kwargs) -> str:
lines = map(lambda obj: json.dumps(obj, **kwargs), obj)
return '\n'.join(lines)


def reader(io_handle: Union[StringIO, FileIO, Iterable], **kwargs):
for line in io_handle:
yield json.loads(line, **kwargs)
99 changes: 99 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import requests

import pytest


@pytest.fixture
def ndjson_content():
line = """{"uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", "schemaId": "ckaeasyfk004y0y7wyye5epgu", "dataRow": {"id": "ck7kftpan8ir008910yf07r9c"}, "bbox": {"top": 48, "left": 58, "height": 865, "width": 1512}}
{"uuid": "29b878f3-c2b4-4dbf-9f22-a795f0720125", "schemaId": "ckapgvrl7007q0y7ujkjkaaxt", "dataRow": {"id": "ck7kftpan8ir008910yf07r9c"}, "polygon": [{"x": 147.692, "y": 118.154}, {"x": 142.769, "y": 404.923}, {"x": 57.846, "y": 318.769}, {"x": 28.308, "y": 169.846}]}"""
expected_objects = [{
'uuid': '9fd9a92e-2560-4e77-81d4-b2e955800092',
'schemaId': 'ckaeasyfk004y0y7wyye5epgu',
'dataRow': {
'id': 'ck7kftpan8ir008910yf07r9c'
},
'bbox': {
'top': 48,
'left': 58,
'height': 865,
'width': 1512
}
}, {
'uuid':
'29b878f3-c2b4-4dbf-9f22-a795f0720125',
'schemaId':
'ckapgvrl7007q0y7ujkjkaaxt',
'dataRow': {
'id': 'ck7kftpan8ir008910yf07r9c'
},
'polygon': [{
'x': 147.692,
'y': 118.154
}, {
'x': 142.769,
'y': 404.923
}, {
'x': 57.846,
'y': 318.769
}, {
'x': 28.308,
'y': 169.846
}]
}]

return line, expected_objects


@pytest.fixture
def ndjson_content_with_nonascii_and_line_breaks():
line = '{"id": "2489651127", "type": "PushEvent", "actor": {"id": 1459915, "login": "xtuaok", "gravatar_id": "", "url": "https://api.github.com/users/xtuaok", "avatar_url": "https://avatars.githubusercontent.com/u/1459915?"}, "repo": {"id": 6719841, "name": "xtuaok/twitter_track_following", "url": "https://api.github.com/repos/xtuaok/twitter_track_following"}, "payload": {"push_id": 536864008, "size": 1, "distinct_size": 1, "ref": "refs/heads/xtuaok", "head": "afb8afe306c7893d93d383a06e4d9df53b41bf47", "before": "4671b4868f1a060f2ed64d8268cd22d514a84e63", "commits": [{"sha": "afb8afe306c7893d93d383a06e4d9df53b41bf47", "author": {"email": "47cb89439b2d6961b59dff4298e837f67aa77389@gmail.com", "name": "Tomonori Tamagawa"}, "message": "Update ID 949438177,, - screen_name: chomado, - name: ちょまど@初詣おみくじ凶, - description: ( *゚▽゚* っ)З腐女子!絵描き!| H26新卒文系SE (入社して4ヶ月目の8月にSIer(適応障害になった)を辞職し開発者に転職) | H26秋応用情報合格!| 自作bot (in PHP) chomado_bot | プログラミングガチ初心者, - location:", "distinct": true, "url": "https://api.github.com/repos/xtuaok/twitter_track_following/commits/afb8afe306c7893d93d383a06e4d9df53b41bf47"}]}, "public": true, "created_at": "2015-01-01T15:00:10Z"}'
expected_objects = [{
'id': '2489651127',
'type': 'PushEvent',
'actor': {
'id': 1459915,
'login': 'xtuaok',
'gravatar_id': '',
'url': 'https://api.github.com/users/xtuaok',
'avatar_url': 'https://avatars.githubusercontent.com/u/1459915?'
},
'repo': {
'id': 6719841,
'name': 'xtuaok/twitter_track_following',
'url': 'https://api.github.com/repos/xtuaok/twitter_track_following'
},
'payload': {
'push_id':
536864008,
'size':
1,
'distinct_size':
1,
'ref':
'refs/heads/xtuaok',
'head':
'afb8afe306c7893d93d383a06e4d9df53b41bf47',
'before':
'4671b4868f1a060f2ed64d8268cd22d514a84e63',
'commits': [{
'sha':
'afb8afe306c7893d93d383a06e4d9df53b41bf47',
'author': {
'email':
'47cb89439b2d6961b59dff4298e837f67aa77389@gmail.com',
'name':
'Tomonori Tamagawa'
},
'message':
'Update ID 949438177,, - screen_name: chomado, - name: ちょまど@初詣おみくじ凶, - description: ( *゚▽゚* っ)З腐女子!絵描き!| H26新卒文系SE (入社して4ヶ月目の8月にSIer(適応障害になった)を辞職し開発者に転職) | H26秋応用情報合格!| 自作bot (in PHP) chomado_bot | プログラミングガチ初心者, - location:',
'distinct':
True,
'url':
'https://api.github.com/repos/xtuaok/twitter_track_following/commits/afb8afe306c7893d93d383a06e4d9df53b41bf47'
}]
},
'public': True,
'created_at': '2015-01-01T15:00:10Z'
}]
return line, expected_objects
36 changes: 36 additions & 0 deletions tests/unit/test_ndjson_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import ast
import random
import time
from io import StringIO
import ndjson
from labelbox.data.serialization.ndjson import parser


def test_loads(ndjson_content):
expected_line, expected_objects = ndjson_content

parsed_line = parser.loads(expected_line)
assert parsed_line == expected_objects
assert parser.dumps(parsed_line) == expected_line


def test_reader_stringio(ndjson_content):
line, ndjson_objects = ndjson_content

text_io = StringIO(line)
parsed_arr = []
reader = parser.reader(text_io)
for _, r in enumerate(reader):
parsed_arr.append(r)
assert parsed_arr == ndjson_objects


def test_non_ascii_new_line(ndjson_content_with_nonascii_and_line_breaks):
line, expected_objects = ndjson_content_with_nonascii_and_line_breaks
parsed = parser.loads(line)

assert parsed == expected_objects

# NOTE: json parser converts unicode chars to unicode literals by default and this is a good practice
# but it is not what we want here since we want to compare the strings with actual unicode chars
assert ast.literal_eval("'" + parser.dumps(parsed) + "'") == line