From b0dcdb5eadc59e7cdfca5a7befc6fced4583cd58 Mon Sep 17 00:00:00 2001 From: Michal Noszczak Date: Mon, 19 Feb 2024 15:17:33 +0000 Subject: [PATCH 1/2] Add additional AWS validation --- labelbox/schema/dataset.py | 8 ++++++++ tests/integration/test_data_rows.py | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 9f7cba6ed..23f9151d3 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -153,6 +153,11 @@ def convert_field_keys(items): "DataRow.row_data missing when creating DataRow.") row_data = args[DataRow.row_data.name] + + if row_data.startswith("s3:/"): + raise InvalidQueryError( + "row_data: s3 assets must start with 'https'.") + if not isinstance(row_data, str): # If the row data is an object, upload as a string args[DataRow.row_data.name] = json.dumps(row_data) @@ -425,6 +430,9 @@ def validate_keys(item): raise InvalidQueryError( "`row_data` missing when creating DataRow.") + if item.get('row_data').startswith("s3:/"): + raise InvalidQueryError( + "row_data: s3 assets must start with 'https'.") invalid_keys = set(item) - { *{f.name for f in DataRow.fields()}, 'attachments', 'media_type' } diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index 326a93c27..a0e524d11 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -127,6 +127,18 @@ def test_get_data_row(data_row, client): assert client.get_data_row(data_row.uid) +def test_create_invalid_aws_data_row(dataset, client): + with pytest.raises(labelbox.exceptions.InvalidQueryError) as exc: + dataset.create_data_row(row_data="s3://labelbox-public-data/invalid") + assert "s3" in exc.value.message + + with pytest.raises(labelbox.exceptions.InvalidQueryError) as exc: + dataset.create_data_rows([{ + "row_data": "s3://labelbox-public-data/invalid" + }]) + assert "s3" in exc.value.message + + def test_lookup_data_rows(client, dataset): uid = str(uuid.uuid4()) # 1 external id : 1 uid From 596f3d4c3b7c4aaa71de5d9f9f68065d8c6c4e95 Mon Sep 17 00:00:00 2001 From: Michal Noszczak Date: Tue, 20 Feb 2024 07:57:12 +0000 Subject: [PATCH 2/2] Safe code --- labelbox/schema/dataset.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 23f9151d3..ffed0534f 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -154,7 +154,7 @@ def convert_field_keys(items): row_data = args[DataRow.row_data.name] - if row_data.startswith("s3:/"): + if isinstance(row_data, str) and row_data.startswith("s3:/"): raise InvalidQueryError( "row_data: s3 assets must start with 'https'.") @@ -430,7 +430,8 @@ def validate_keys(item): raise InvalidQueryError( "`row_data` missing when creating DataRow.") - if item.get('row_data').startswith("s3:/"): + if isinstance(item.get('row_data'), + str) and item.get('row_data').startswith("s3:/"): raise InvalidQueryError( "row_data: s3 assets must start with 'https'.") invalid_keys = set(item) - {