Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions tests/integration/export_v2/test_export_data_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,24 @@ def test_export_data_rows(client, data_row, wait_for_data_row_processing):
"project_details": True,
"data_row_details": True
}

task = DataRow.export_v2(client=client, data_rows=[data_row])
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None
assert len(task.result) == 1
assert task.result[0]["data_row"]["id"] == data_row.uid
assert task.result[0]["data_row"]["external_id"] == data_row.external_id
assert task.result[0]["data_row"]["row_data"] == data_row.row_data
assert task.result[0]['data_row']['id'] == data_row.uid

task = DataRow.export_v2(client=client, data_rows=[data_row.uid])
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None
assert len(task.result) == 1
assert task.result[0]['data_row']['id'] == data_row.uid

task = DataRow.export_v2(client=client, global_keys=[data_row.global_key])
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None
assert len(task.result) == 1
assert task.result[0]['data_row']['id'] == data_row.uid
23 changes: 18 additions & 5 deletions tests/integration/export_v2/test_export_dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest


@pytest.mark.parametrize('data_rows', [2], indirect=True)
@pytest.mark.parametrize('data_rows', [3], indirect=True)
def test_dataset_export_v2(export_v2_test_helpers, dataset, data_rows):
data_row_ids = [dr.uid for dr in data_rows]
params = {"performance_details": False, "label_details": False}
Expand All @@ -11,10 +11,7 @@ def test_dataset_export_v2(export_v2_test_helpers, dataset, data_rows):
assert set([dr['data_row']['id'] for dr in task_results
]) == set(data_row_ids)


@pytest.mark.parametrize('data_rows', [3], indirect=True)
def test_dataset_export_v2_datarow_list(export_v2_test_helpers, dataset,
data_rows):
# testing with a datarow ids filter
datarow_filter_size = 2
data_row_ids = [dr.uid for dr in data_rows]

Expand All @@ -29,3 +26,19 @@ def test_dataset_export_v2_datarow_list(export_v2_test_helpers, dataset,
# only filtered datarows should be exported
assert set([dr['data_row']['id'] for dr in task_results
]) == set(data_row_ids[:datarow_filter_size])

# testing with a global key and a datarow id filter
datarow_filter_size = 2
global_keys = [dr.global_key for dr in data_rows]

params = {"performance_details": False, "label_details": False}
filters = {"global_keys": global_keys[:datarow_filter_size]}

task_results = export_v2_test_helpers.run_dataset_export_v2_task(
dataset, filters=filters, params=params)

# only 2 datarows should be exported
assert len(task_results) == datarow_filter_size
# only filtered datarows should be exported
assert set([dr['data_row']['global_key'] for dr in task_results
]) == set(global_keys[:datarow_filter_size])
73 changes: 70 additions & 3 deletions tests/integration/export_v2/test_export_project.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from datetime import datetime, timezone, timedelta

import pytest
import uuid
from typing import Tuple

from labelbox.schema.media_type import MediaType
from labelbox import Project, Dataset
from labelbox.schema.data_row import DataRow
from labelbox.schema.label import Label

IMAGE_URL = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/potato.jpeg"

Expand Down Expand Up @@ -86,17 +91,25 @@ def test_project_export_v2_date_filters(client, export_v2_test_helpers,

filters = {
"last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
"label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"]
"label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
"task_queue_status": "InReview"
}

# TODO: Right now we don't have a way to test this
include_performance_details = True
params = {
"include_performance_details": include_performance_details,
"performance_details": include_performance_details,
"include_labels": True,
"project_details": True,
"media_type_override": MediaType.Image
}

task_queues = project.task_queues()

review_queue = next(
tq for tq in task_queues if tq.queue_type == "MANUAL_REVIEW_QUEUE")
project.move_data_rows_to_task_queue([data_row.uid], review_queue.uid)

task_results = export_v2_test_helpers.run_project_export_v2_task(
project, task_name=task_name, filters=filters, params=params)

Expand All @@ -105,6 +118,7 @@ def test_project_export_v2_date_filters(client, export_v2_test_helpers,
task_project_label_ids_set = set(
map(lambda prediction: prediction['id'], task_project['labels']))
assert label_id in task_project_label_ids_set
assert task_project['project_details']['workflow_status'] == 'IN_REVIEW'

# TODO: Add back in when we have a way to test this
# if include_performance_details:
Expand Down Expand Up @@ -156,7 +170,7 @@ def test_project_export_v2_with_iso_date_filters(client, export_v2_test_helpers,


@pytest.mark.parametrize("data_rows", [3], indirect=True)
def test_project_export_v2_datarow_filter(
def test_project_export_v2_datarows_filter(
export_v2_test_helpers,
configured_batch_project_with_multiple_datarows):
project, _, data_rows = configured_batch_project_with_multiple_datarows
Expand All @@ -178,3 +192,56 @@ def test_project_export_v2_datarow_filter(
# only filtered datarows should be exported
assert set([dr['data_row']['id'] for dr in task_results
]) == set(data_row_ids[:datarow_filter_size])

global_keys = [dr.global_key for dr in data_rows]
filters = {
"last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
"label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
"global_keys": global_keys[:datarow_filter_size]
}
params = {"data_row_details": True, "media_type_override": MediaType.Image}
task_results = export_v2_test_helpers.run_project_export_v2_task(
project, filters=filters, params=params)

# only 2 datarows should be exported
assert len(task_results) == datarow_filter_size
# only filtered datarows should be exported
assert set([dr['data_row']['global_key'] for dr in task_results
]) == set(global_keys[:datarow_filter_size])


def test_batch_project_export_v2(
configured_batch_project_with_label: Tuple[Project, Dataset, DataRow,
Label],
export_v2_test_helpers, dataset: Dataset, image_url: str):
project, dataset, *_ = configured_batch_project_with_label

batch = list(project.batches())[0]
filters = {
"last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
"label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
"batch_ids": [batch.uid],
}
params = {
"include_performance_details": True,
"include_labels": True,
"media_type_override": MediaType.Image
}
task_name = "test_batch_export_v2"
task = dataset.create_data_rows([
{
"row_data": image_url,
"external_id": "my-image"
},
] * 2)
task.wait_till_done()
data_rows = [dr.uid for dr in list(dataset.export_data_rows())]
batch_one = f'batch one {uuid.uuid4()}'

# This test creates two batches, only one batch should be exporter
# Creatin second batch that will not be used in the export due to the filter: batch_id
project.create_batch(batch_one, data_rows)

task_results = export_v2_test_helpers.run_project_export_v2_task(
project, task_name=task_name, filters=filters, params=params)
assert (batch.size == len(task_results))
2 changes: 1 addition & 1 deletion tests/integration/export_v2/test_legacy_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def test_issues_export(project):


def test_dataset_export(dataset, image_url):
n_data_rows = 5
n_data_rows = 2
ids = set()
for _ in range(n_data_rows):
ids.add(dataset.create_data_row(row_data=image_url))
Expand Down
28 changes: 0 additions & 28 deletions tests/integration/test_data_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from labelbox import DataRow
from labelbox.exceptions import MalformedQueryException
from labelbox.schema.export_filters import DatarowExportFilters
from labelbox.schema.task import Task
from labelbox.schema.data_row_metadata import DataRowMetadataField, DataRowMetadataKind
import labelbox.exceptions
Expand Down Expand Up @@ -1037,30 +1036,3 @@ def test_create_data_row_with_media_type(dataset, image_url, is_adv_enabled):
exc.value)

dataset.create_data_row(row_data=image_url, media_type="IMAGE")


def test_export_data_rows(client, data_row, wait_for_data_row_processing):
# Ensure created data rows are indexed
data_row = wait_for_data_row_processing(client, data_row)
time.sleep(7) # temp fix for ES indexing delay

task = DataRow.export_v2(client=client, data_rows=[data_row])
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None
assert len(task.result) == 1
assert task.result[0]['data_row']['id'] == data_row.uid

task = DataRow.export_v2(client=client, data_rows=[data_row.uid])
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None
assert len(task.result) == 1
assert task.result[0]['data_row']['id'] == data_row.uid

task = DataRow.export_v2(client=client, global_keys=[data_row.global_key])
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None
assert len(task.result) == 1
assert task.result[0]['data_row']['id'] == data_row.uid
53 changes: 0 additions & 53 deletions tests/integration/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,59 +144,6 @@ def test_bulk_conversation(dataset, sample_bulk_conversation: list) -> None:
assert len(list(dataset.data_rows())) == len(sample_bulk_conversation)


def test_data_row_export(dataset, image_url):
n_data_rows = 2
ids = set()
for _ in range(n_data_rows):
ids.add(dataset.create_data_row(row_data=image_url))
result = list(dataset.export_data_rows())
assert len(result) == n_data_rows
assert set(result) == ids


@pytest.mark.parametrize('data_rows', [3], indirect=True)
def test_dataset_export_v2(export_v2_test_helpers, dataset, data_rows):
data_row_ids = [dr.uid for dr in data_rows]
params = {"performance_details": False, "label_details": False}
task_results = export_v2_test_helpers.run_dataset_export_v2_task(
dataset, params=params)
assert len(task_results) == 3
assert set([dr['data_row']['id'] for dr in task_results
]) == set(data_row_ids)

# testing with a datarow ids filter
datarow_filter_size = 2
data_row_ids = [dr.uid for dr in data_rows]

params = {"performance_details": False, "label_details": False}
filters = {"data_row_ids": data_row_ids[:datarow_filter_size]}

task_results = export_v2_test_helpers.run_dataset_export_v2_task(
dataset, filters=filters, params=params)

# only 2 datarows should be exported
assert len(task_results) == datarow_filter_size
# only filtered datarows should be exported
assert set([dr['data_row']['id'] for dr in task_results
]) == set(data_row_ids[:datarow_filter_size])

# testing with a global key and a datarow id filter
datarow_filter_size = 2
global_keys = [dr.global_key for dr in data_rows]

params = {"performance_details": False, "label_details": False}
filters = {"global_keys": global_keys[:datarow_filter_size]}

task_results = export_v2_test_helpers.run_dataset_export_v2_task(
dataset, filters=filters, params=params)

# only 2 datarows should be exported
assert len(task_results) == datarow_filter_size
# only filtered datarows should be exported
assert set([dr['data_row']['global_key'] for dr in task_results
]) == set(global_keys[:datarow_filter_size])


def test_create_descriptor_file(dataset):
import unittest.mock as mock
with mock.patch.object(dataset.client,
Expand Down
Loading