Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import requests

import json
import pytest


Expand Down Expand Up @@ -97,3 +96,36 @@ def ndjson_content_with_nonascii_and_line_breaks():
'created_at': '2015-01-01T15:00:10Z'
}]
return line, expected_objects


@pytest.fixture
def generate_random_ndjson(rand_gen):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use static json line for test predictability?


def _generate_random_ndjson(lines: int = 10):
return [
json.dumps({"data_row": {
"id": rand_gen(str)
}}) for _ in range(lines)
]

return _generate_random_ndjson


@pytest.fixture
def mock_response():

class MockResponse:

def __init__(self, text: str, exception: Exception = None) -> None:
self._text = text
self._exception = exception

@property
def text(self):
return self._text

def raise_for_status(self):
if self._exception:
raise self._exception

return MockResponse
76 changes: 76 additions & 0 deletions tests/unit/export_task/test_unit_file_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from unittest.mock import MagicMock

from labelbox.schema.export_task import (
Converter,
FileConverter,
Range,
StreamType,
_MetadataFileInfo,
_MetadataHeader,
_TaskContext,
)


class TestFileConverter:

def test_with_correct_ndjson(self, tmp_path, generate_random_ndjson):
directory = tmp_path / "file-converter"
directory.mkdir()
line_count = 10
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious why can't we test the same with 2 lines

ndjson = generate_random_ndjson(line_count)
file_content = "\n".join(ndjson) + "\n"
input_args = Converter.ConverterInputArgs(
ctx=_TaskContext(
client=MagicMock(),
task_id="task-id",
stream_type=StreamType.RESULT,
metadata_header=_MetadataHeader(total_size=len(file_content),
total_lines=line_count),
),
file_info=_MetadataFileInfo(
offsets=Range(start=0, end=len(file_content) - 1),
lines=Range(start=0, end=line_count - 1),
file="file.ndjson",
),
raw_data=file_content,
)
path = directory / "output.ndjson"
with FileConverter(file_path=path) as converter:
for output in converter.convert(input_args):
assert output.current_line == 0
assert output.current_offset == 0
assert output.file_path == path
assert output.total_lines == line_count
assert output.total_size == len(file_content)
assert output.bytes_written == len(file_content)

def test_with_no_newline_at_end(self, tmp_path, generate_random_ndjson):
directory = tmp_path / "file-converter"
directory.mkdir()
line_count = 10
ndjson = generate_random_ndjson(line_count)
file_content = "\n".join(ndjson)
input_args = Converter.ConverterInputArgs(
ctx=_TaskContext(
client=MagicMock(),
task_id="task-id",
stream_type=StreamType.RESULT,
metadata_header=_MetadataHeader(total_size=len(file_content),
total_lines=line_count),
),
file_info=_MetadataFileInfo(
offsets=Range(start=0, end=len(file_content) - 1),
lines=Range(start=0, end=line_count - 1),
file="file.ndjson",
),
raw_data=file_content,
)
path = directory / "output.ndjson"
with FileConverter(file_path=path) as converter:
for output in converter.convert(input_args):
assert output.current_line == 0
assert output.current_offset == 0
assert output.file_path == path
assert output.total_lines == line_count
assert output.total_size == len(file_content)
assert output.bytes_written == len(file_content)
139 changes: 139 additions & 0 deletions tests/unit/export_task/test_unit_file_retriever_by_line.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from unittest.mock import MagicMock, patch
from labelbox.schema.export_task import (
FileRetrieverByLine,
_TaskContext,
_MetadataHeader,
StreamType,
)


class TestFileRetrieverByLine:

def test_by_line_from_start(self, generate_random_ndjson, mock_response):
line_count = 10
ndjson = generate_random_ndjson(line_count)
file_content = "\n".join(ndjson) + "\n"

mock_client = MagicMock()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

like this client mocking

mock_client.execute = MagicMock(
return_value={
"task": {
"exportFileFromLine": {
"offsets": {
"start": "0",
"end": len(file_content) - 1
},
"lines": {
"start": "0",
"end": str(line_count - 1)
},
"file": "http://some-url.com/file.ndjson",
}
}
})

mock_ctx = _TaskContext(
client=mock_client,
task_id="task-id",
stream_type=StreamType.RESULT,
metadata_header=_MetadataHeader(total_size=len(file_content),
total_lines=line_count),
)

with patch("requests.get", return_value=mock_response(file_content)):
retriever = FileRetrieverByLine(mock_ctx, 0)
info, content = retriever.get_next_chunk()
assert info.offsets.start == 0
assert info.offsets.end == len(file_content) - 1
assert info.lines.start == 0
assert info.lines.end == line_count - 1
assert info.file == "http://some-url.com/file.ndjson"
assert content == file_content

def test_by_line_from_middle(self, generate_random_ndjson, mock_response):
line_count = 10
ndjson = generate_random_ndjson(line_count)
file_content = "\n".join(ndjson) + "\n"

mock_client = MagicMock()
mock_client.execute = MagicMock(
return_value={
"task": {
"exportFileFromLine": {
"offsets": {
"start": "0",
"end": len(file_content) - 1
},
"lines": {
"start": "0",
"end": str(line_count - 1)
},
"file": "http://some-url.com/file.ndjson",
}
}
})

mock_ctx = _TaskContext(
client=mock_client,
task_id="task-id",
stream_type=StreamType.RESULT,
metadata_header=_MetadataHeader(total_size=len(file_content),
total_lines=line_count),
)

line_start = 5
current_offset = file_content.find(ndjson[line_start])

with patch("requests.get", return_value=mock_response(file_content)):
retriever = FileRetrieverByLine(mock_ctx, line_start)
info, content = retriever.get_next_chunk()
assert info.offsets.start == current_offset
assert info.offsets.end == len(file_content) - 1
assert info.lines.start == line_start
assert info.lines.end == line_count - 1
assert info.file == "http://some-url.com/file.ndjson"
assert content == file_content[current_offset:]

def test_by_line_from_last(self, generate_random_ndjson, mock_response):
line_count = 10
ndjson = generate_random_ndjson(line_count)
file_content = "\n".join(ndjson) + "\n"

mock_client = MagicMock()
mock_client.execute = MagicMock(
return_value={
"task": {
"exportFileFromLine": {
"offsets": {
"start": "0",
"end": len(file_content) - 1
},
"lines": {
"start": "0",
"end": str(line_count - 1)
},
"file": "http://some-url.com/file.ndjson",
}
}
})

mock_ctx = _TaskContext(
client=mock_client,
task_id="task-id",
stream_type=StreamType.RESULT,
metadata_header=_MetadataHeader(total_size=len(file_content),
total_lines=line_count),
)

line_start = 9
current_offset = file_content.find(ndjson[line_start])

with patch("requests.get", return_value=mock_response(file_content)):
retriever = FileRetrieverByLine(mock_ctx, line_start)
info, content = retriever.get_next_chunk()
assert info.offsets.start == current_offset
assert info.offsets.end == len(file_content) - 1
assert info.lines.start == line_start
assert info.lines.end == line_count - 1
assert info.file == "http://some-url.com/file.ndjson"
assert content == file_content[current_offset:]
96 changes: 96 additions & 0 deletions tests/unit/export_task/test_unit_file_retriever_by_offset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from unittest.mock import MagicMock, patch
from labelbox.schema.export_task import (
FileRetrieverByOffset,
_TaskContext,
_MetadataHeader,
StreamType,
)


class TestFileRetrieverByOffset:

def test_by_offset_from_start(self, generate_random_ndjson, mock_response):
line_count = 10
ndjson = generate_random_ndjson(line_count)
file_content = "\n".join(ndjson) + "\n"

mock_client = MagicMock()
mock_client.execute = MagicMock(
return_value={
"task": {
"exportFileFromOffset": {
"offsets": {
"start": "0",
"end": len(file_content) - 1
},
"lines": {
"start": "0",
"end": str(line_count - 1)
},
"file": "http://some-url.com/file.ndjson",
}
}
})

mock_ctx = _TaskContext(
client=mock_client,
task_id="task-id",
stream_type=StreamType.RESULT,
metadata_header=_MetadataHeader(total_size=len(file_content),
total_lines=line_count),
)

with patch("requests.get", return_value=mock_response(file_content)):
retriever = FileRetrieverByOffset(mock_ctx, 0)
info, content = retriever.get_next_chunk()
assert info.offsets.start == 0
assert info.offsets.end == len(file_content) - 1
assert info.lines.start == 0
assert info.lines.end == line_count - 1
assert info.file == "http://some-url.com/file.ndjson"
assert content == file_content

def test_by_offset_from_middle(self, generate_random_ndjson, mock_response):
line_count = 10
ndjson = generate_random_ndjson(line_count)
file_content = "\n".join(ndjson) + "\n"

mock_client = MagicMock()
mock_client.execute = MagicMock(
return_value={
"task": {
"exportFileFromOffset": {
"offsets": {
"start": "0",
"end": len(file_content) - 1
},
"lines": {
"start": "0",
"end": str(line_count - 1)
},
"file": "http://some-url.com/file.ndjson",
}
}
})

mock_ctx = _TaskContext(
client=mock_client,
task_id="task-id",
stream_type=StreamType.RESULT,
metadata_header=_MetadataHeader(total_size=len(file_content),
total_lines=line_count),
)

line_start = 5
skipped_bytes = 15
current_offset = file_content.find(ndjson[line_start]) + skipped_bytes

with patch("requests.get", return_value=mock_response(file_content)):
retriever = FileRetrieverByOffset(mock_ctx, current_offset)
info, content = retriever.get_next_chunk()
assert info.offsets.start == current_offset
assert info.offsets.end == len(file_content) - 1
assert info.lines.start == 5
assert info.lines.end == line_count - 1
assert info.file == "http://some-url.com/file.ndjson"
assert content == file_content[current_offset:]
Loading