From 8e5c65d8529f5b52d36fa460edf119278e4e03ed Mon Sep 17 00:00:00 2001 From: mnoszczak Date: Thu, 9 Mar 2023 00:02:21 +0100 Subject: [PATCH 1/8] Add last_activity_at / label_created_at export v2 support --- labelbox/schema/export_filters.py | 12 +++ labelbox/schema/project.py | 139 ++++++++++++++++++++++++++---- tests/integration/test_project.py | 17 ++++ 3 files changed, 153 insertions(+), 15 deletions(-) create mode 100644 labelbox/schema/export_filters.py diff --git a/labelbox/schema/export_filters.py b/labelbox/schema/export_filters.py new file mode 100644 index 000000000..995a2447a --- /dev/null +++ b/labelbox/schema/export_filters.py @@ -0,0 +1,12 @@ +import sys + +from typing import Optional +if sys.version_info >= (3, 8): + from typing import TypedDict, Tuple +else: + from typing_extensions import TypedDict + + +class ProjectExportFilters(TypedDict): + label_created_at: Optional[Tuple[str, str]] + last_activity_at: Optional[Tuple[str, str]] diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index c260a4ada..68a7b2ce1 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -20,6 +20,7 @@ from labelbox.pagination import PaginatedCollection from labelbox.schema.consensus_settings import ConsensusSettings from labelbox.schema.data_row import DataRow +from labelbox.schema.export_filters import ProjectExportFilters from labelbox.schema.export_params import ProjectExportParams from labelbox.schema.media_type import MediaType from labelbox.schema.queue_mode import QueueMode @@ -46,6 +47,20 @@ logger = logging.getLogger(__name__) +def _validate_datetime(string_date: str) -> bool: + """helper function validate that datetime is as follows: YYYY-MM-DD for the export""" + if string_date: + for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"): + try: + datetime.strptime(string_date, fmt) + return True + except ValueError: + pass + raise ValueError(f"""Incorrect format for: {string_date}. + Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""") + return True + + class Project(DbObject, Updateable, Deletable): """ A Project is a container that includes a labeling frontend, an ontology, datasets and labels. @@ -337,19 +352,6 @@ def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str: if dictionary.get(c) ]) - def _validate_datetime(string_date: str) -> bool: - """helper function validate that datetime is as follows: YYYY-MM-DD for the export""" - if string_date: - for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"): - try: - datetime.strptime(string_date, fmt) - return True - except ValueError: - pass - raise ValueError(f"""Incorrect format for: {string_date}. - Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""") - return True - sleep_time = 2 id_param = "projectId" filter_param = "" @@ -403,12 +405,24 @@ def _validate_datetime(string_date: str) -> bool: """ Creates a project run export task with the given params and returns the task. - >>> export_task = export_v2("my_export_task", filter={"media_attributes": True}) + >>> task = project.export_v2( + >>> filters={ + >>> "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + >>> "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + >>> }, + >>> params={ + >>> "include_performance_details": False, + >>> "include_labels": True + >>> }) + >>> task.wait_till_done() + >>> task.result + """ def export_v2(self, task_name: Optional[str] = None, + filters: ProjectExportFilters = None, params: Optional[ProjectExportParams] = None) -> Task: _params = params or ProjectExportParams({ @@ -420,6 +434,15 @@ def export_v2(self, "label_details": False }) + _filters = filters or ProjectExportFilters() + + def _get_timezone() -> str: + timezone_query_str = """query CurrentUserPyApi { user { timezone } }""" + tz_res = self.client.execute(timezone_query_str) + return tz_res["user"]["timezone"] or "UTC" + + timezone: Optional[str] = None + mutation_name = "exportDataRowsInProject" create_task_query_str = """mutation exportDataRowsInProjectPyApi($input: ExportDataRowsInProjectInput!){ %s(input: $input) {taskId} } @@ -428,7 +451,11 @@ def export_v2(self, "input": { "taskName": task_name, "filters": { - "projectId": self.uid + "projectId": self.uid, + "searchQuery": { + "scope": None, + "query": [] + } }, "params": { "includeAttachments": @@ -446,6 +473,88 @@ def export_v2(self, }, } } + + if _filters.get('last_activity_at') is not None: + if timezone is None: + timezone = _get_timezone() + values = _filters['last_activity_at'] + start, end = values + if (start is not None and end is not None): + [_validate_datetime(date) for date in values] + query_params["input"]["filters"]['searchQuery']['query'].append( + { + "type": "data_row_last_activity_at", + "value": { + "operator": "BETWEEN", + "timezone": timezone, + "value": { + "min": start, + "max": end + } + } + }) + elif (start is not None): + _validate_datetime(start) + query_params["input"]["filters"]['searchQuery']['query'].append( + { + "type": "data_row_last_activity_at", + "value": { + "operator": "GREATER_THAN_OR_EQUAL", + "timezone": timezone, + "value": start + } + }) + elif (end is not None): + _validate_datetime(end) + query_params["input"]["filters"]['searchQuery']['query'].append( + { + "type": "data_row_last_activity_at", + "value": { + "operator": "LESS_THAN_OR_EQUAL", + "timezone": timezone, + "value": end + } + }) + + if _filters.get('label_created_at') is not None: + if timezone is None: + timezone = _get_timezone() + values = _filters['label_created_at'] + start, end = values + if (start is not None and end is not None): + [_validate_datetime(date) for date in values] + query_params["input"]["filters"]['searchQuery']['query'].append( + { + "type": "labeled_at", + "value": { + "operator": "BETWEEN", + "value": { + "min": start, + "max": end + } + } + }) + elif (start is not None): + _validate_datetime(start) + query_params["input"]["filters"]['searchQuery']['query'].append( + { + "type": "labeled_at", + "value": { + "operator": "GREATER_THAN_OR_EQUAL", + "value": start + } + }) + elif (end is not None): + _validate_datetime(end) + query_params["input"]["filters"]['searchQuery']['query'].append( + { + "type": "labeled_at", + "value": { + "operator": "LESS_THAN_OR_EQUAL", + "value": end + } + }) + res = self.client.execute( create_task_query_str, query_params, diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index f16b44e13..4257f0a74 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -53,15 +53,32 @@ def test_project_export_v2(configured_project_with_label): include_performance_details = True task = project.export_v2( task_name, + filters={ + "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + }, params={ "include_performance_details": include_performance_details, "include_labels": True }) + + task_to = project.export_v2( + filters={"last_activity_at": [None, "2050-01-01 00:00:00"]}) + + task_from = project.export_v2( + filters={"last_activity_at": ["2000-01-01 00:00:00", None]}) + assert task.name == task_name task.wait_till_done() assert task.status == "COMPLETE" assert task.errors is None + task_to.wait_till_done() + assert task_to.status == "COMPLETE" + + task_from.wait_till_done() + assert task_from.status == "COMPLETE" + for task_result in task.result: task_project = task_result['projects'][project.uid] task_project_label_ids_set = set( From d8042eab38e5ab9d824282e351e5cd0f676a64e5 Mon Sep 17 00:00:00 2001 From: mnoszczak Date: Thu, 9 Mar 2023 00:04:05 +0100 Subject: [PATCH 2/8] Make filters optional --- labelbox/schema/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 68a7b2ce1..239bfa9e2 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -422,7 +422,7 @@ def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str: def export_v2(self, task_name: Optional[str] = None, - filters: ProjectExportFilters = None, + filters: Optional[ProjectExportFilters] = None, params: Optional[ProjectExportParams] = None) -> Task: _params = params or ProjectExportParams({ From be1ee2ed6c11af9e56bf12acfb878a4932605d65 Mon Sep 17 00:00:00 2001 From: mnoszczak Date: Thu, 9 Mar 2023 00:07:35 +0100 Subject: [PATCH 3/8] Mypy typing fixes --- labelbox/schema/project.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 239bfa9e2..e4b6d7366 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -434,7 +434,10 @@ def export_v2(self, "label_details": False }) - _filters = filters or ProjectExportFilters() + _filters = filters or ProjectExportFilters({ + "last_activity_at": None, + "label_created_at": None + }) def _get_timezone() -> str: timezone_query_str = """query CurrentUserPyApi { user { timezone } }""" From 089f0a71f2d9d6ced7b0982716b22a58dff8d537 Mon Sep 17 00:00:00 2001 From: mnoszczak Date: Thu, 9 Mar 2023 00:30:18 +0100 Subject: [PATCH 4/8] Docs, mypy fixes --- labelbox/schema/project.py | 56 +++++++++++++++++-------------- tests/integration/test_project.py | 2 +- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index e4b6d7366..35e30175b 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -402,28 +402,29 @@ def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str: self.uid) time.sleep(sleep_time) - """ - Creates a project run export task with the given params and returns the task. - - >>> task = project.export_v2( - >>> filters={ - >>> "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - >>> "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] - >>> }, - >>> params={ - >>> "include_performance_details": False, - >>> "include_labels": True - >>> }) - >>> task.wait_till_done() - >>> task.result - - - """ + def export_v2(self, task_name: Optional[str] = None, filters: Optional[ProjectExportFilters] = None, params: Optional[ProjectExportParams] = None) -> Task: + """ + Creates a project run export task with the given params and returns the task. + + For more information visit: https://docs.labelbox.com/docs/exports-v2#export-from-a-project-python-sdk + + >>> task = project.export_v2( + >>> filters={ + >>> "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + >>> "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + >>> }, + >>> params={ + >>> "include_performance_details": False, + >>> "include_labels": True + >>> }) + >>> task.wait_till_done() + >>> task.result + """ _params = params or ProjectExportParams({ "attachments": False, @@ -450,6 +451,8 @@ def _get_timezone() -> str: create_task_query_str = """mutation exportDataRowsInProjectPyApi($input: ExportDataRowsInProjectInput!){ %s(input: $input) {taskId} } """ % (mutation_name) + + search_query = [] query_params = { "input": { "taskName": task_name, @@ -457,7 +460,7 @@ def _get_timezone() -> str: "projectId": self.uid, "searchQuery": { "scope": None, - "query": [] + "query": search_query } }, "params": { @@ -477,14 +480,15 @@ def _get_timezone() -> str: } } - if _filters.get('last_activity_at') is not None: + + if "last_activity_at" in _filters and _filters['last_activity_at'] is not None: if timezone is None: timezone = _get_timezone() values = _filters['last_activity_at'] start, end = values if (start is not None and end is not None): [_validate_datetime(date) for date in values] - query_params["input"]["filters"]['searchQuery']['query'].append( + search_query.append( { "type": "data_row_last_activity_at", "value": { @@ -498,7 +502,7 @@ def _get_timezone() -> str: }) elif (start is not None): _validate_datetime(start) - query_params["input"]["filters"]['searchQuery']['query'].append( + search_query.append( { "type": "data_row_last_activity_at", "value": { @@ -509,7 +513,7 @@ def _get_timezone() -> str: }) elif (end is not None): _validate_datetime(end) - query_params["input"]["filters"]['searchQuery']['query'].append( + search_query.append( { "type": "data_row_last_activity_at", "value": { @@ -519,14 +523,14 @@ def _get_timezone() -> str: } }) - if _filters.get('label_created_at') is not None: + if "label_created_at" in _filters and _filters["label_created_at"] is not None: if timezone is None: timezone = _get_timezone() values = _filters['label_created_at'] start, end = values if (start is not None and end is not None): [_validate_datetime(date) for date in values] - query_params["input"]["filters"]['searchQuery']['query'].append( + search_query.append( { "type": "labeled_at", "value": { @@ -539,7 +543,7 @@ def _get_timezone() -> str: }) elif (start is not None): _validate_datetime(start) - query_params["input"]["filters"]['searchQuery']['query'].append( + search_query.append( { "type": "labeled_at", "value": { @@ -549,7 +553,7 @@ def _get_timezone() -> str: }) elif (end is not None): _validate_datetime(end) - query_params["input"]["filters"]['searchQuery']['query'].append( + search_query.append( { "type": "labeled_at", "value": { diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index 4257f0a74..bd3495af3 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -66,7 +66,7 @@ def test_project_export_v2(configured_project_with_label): filters={"last_activity_at": [None, "2050-01-01 00:00:00"]}) task_from = project.export_v2( - filters={"last_activity_at": ["2000-01-01 00:00:00", None]}) + filters={"label_created_at": ["2000-01-01 00:00:00", None]}) assert task.name == task_name task.wait_till_done() From 0f77dd5be18d497df173decde9f33d57861d673a Mon Sep 17 00:00:00 2001 From: mnoszczak Date: Thu, 9 Mar 2023 00:32:11 +0100 Subject: [PATCH 5/8] Docstring fixes --- labelbox/schema/project.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 35e30175b..57d9b0c40 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -414,14 +414,14 @@ def export_v2(self, For more information visit: https://docs.labelbox.com/docs/exports-v2#export-from-a-project-python-sdk >>> task = project.export_v2( - >>> filters={ - >>> "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], - >>> "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] - >>> }, - >>> params={ - >>> "include_performance_details": False, - >>> "include_labels": True - >>> }) + >>> filters={ + >>> "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + >>> "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + >>> }, + >>> params={ + >>> "include_performance_details": False, + >>> "include_labels": True + >>> }) >>> task.wait_till_done() >>> task.result """ From ef1df397e55d534e247ece869ec18c5037207ffc Mon Sep 17 00:00:00 2001 From: mnoszczak Date: Thu, 9 Mar 2023 00:34:02 +0100 Subject: [PATCH 6/8] yapf --- labelbox/schema/project.py | 111 +++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 59 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 57d9b0c40..f7654c306 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -402,8 +402,6 @@ def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str: self.uid) time.sleep(sleep_time) - - def export_v2(self, task_name: Optional[str] = None, filters: Optional[ProjectExportFilters] = None, @@ -451,7 +449,7 @@ def _get_timezone() -> str: create_task_query_str = """mutation exportDataRowsInProjectPyApi($input: ExportDataRowsInProjectInput!){ %s(input: $input) {taskId} } """ % (mutation_name) - + search_query = [] query_params = { "input": { @@ -480,87 +478,82 @@ def _get_timezone() -> str: } } - - if "last_activity_at" in _filters and _filters['last_activity_at'] is not None: + if "last_activity_at" in _filters and _filters[ + 'last_activity_at'] is not None: if timezone is None: timezone = _get_timezone() values = _filters['last_activity_at'] start, end = values if (start is not None and end is not None): [_validate_datetime(date) for date in values] - search_query.append( - { - "type": "data_row_last_activity_at", + search_query.append({ + "type": "data_row_last_activity_at", + "value": { + "operator": "BETWEEN", + "timezone": timezone, "value": { - "operator": "BETWEEN", - "timezone": timezone, - "value": { - "min": start, - "max": end - } + "min": start, + "max": end } - }) + } + }) elif (start is not None): _validate_datetime(start) - search_query.append( - { - "type": "data_row_last_activity_at", - "value": { - "operator": "GREATER_THAN_OR_EQUAL", - "timezone": timezone, - "value": start - } - }) + search_query.append({ + "type": "data_row_last_activity_at", + "value": { + "operator": "GREATER_THAN_OR_EQUAL", + "timezone": timezone, + "value": start + } + }) elif (end is not None): _validate_datetime(end) - search_query.append( - { - "type": "data_row_last_activity_at", - "value": { - "operator": "LESS_THAN_OR_EQUAL", - "timezone": timezone, - "value": end - } - }) + search_query.append({ + "type": "data_row_last_activity_at", + "value": { + "operator": "LESS_THAN_OR_EQUAL", + "timezone": timezone, + "value": end + } + }) - if "label_created_at" in _filters and _filters["label_created_at"] is not None: + if "label_created_at" in _filters and _filters[ + "label_created_at"] is not None: if timezone is None: timezone = _get_timezone() values = _filters['label_created_at'] start, end = values if (start is not None and end is not None): [_validate_datetime(date) for date in values] - search_query.append( - { - "type": "labeled_at", + search_query.append({ + "type": "labeled_at", + "value": { + "operator": "BETWEEN", "value": { - "operator": "BETWEEN", - "value": { - "min": start, - "max": end - } + "min": start, + "max": end } - }) + } + }) elif (start is not None): _validate_datetime(start) - search_query.append( - { - "type": "labeled_at", - "value": { - "operator": "GREATER_THAN_OR_EQUAL", - "value": start - } - }) + search_query.append({ + "type": "labeled_at", + "value": { + "operator": "GREATER_THAN_OR_EQUAL", + "value": start + } + }) elif (end is not None): _validate_datetime(end) - search_query.append( - { - "type": "labeled_at", - "value": { - "operator": "LESS_THAN_OR_EQUAL", - "value": end - } - }) + search_query.append({ + "type": "labeled_at", + "value": { + "operator": "LESS_THAN_OR_EQUAL", + "value": end + } + }) res = self.client.execute( create_task_query_str, From 78e052bf511fdd941faae4c89c2293341bdfa950 Mon Sep 17 00:00:00 2001 From: mnoszczak Date: Thu, 9 Mar 2023 00:49:04 +0100 Subject: [PATCH 7/8] yapf&mypy fixes --- labelbox/schema/export_filters.py | 4 +++- labelbox/schema/project.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/labelbox/schema/export_filters.py b/labelbox/schema/export_filters.py index 995a2447a..1942994d6 100644 --- a/labelbox/schema/export_filters.py +++ b/labelbox/schema/export_filters.py @@ -2,10 +2,12 @@ from typing import Optional if sys.version_info >= (3, 8): - from typing import TypedDict, Tuple + from typing import TypedDict else: from typing_extensions import TypedDict +from typing import Tuple + class ProjectExportFilters(TypedDict): label_created_at: Optional[Tuple[str, str]] diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index f7654c306..99196fcf2 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -4,7 +4,7 @@ from collections import namedtuple from datetime import datetime, timezone from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union +from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, List, Optional, Union from urllib.parse import urlparse import ndjson @@ -450,7 +450,7 @@ def _get_timezone() -> str: %s(input: $input) {taskId} } """ % (mutation_name) - search_query = [] + search_query: List[Dict[str, Collection[str]]] = [] query_params = { "input": { "taskName": task_name, From c1f8af0b686644e6cdd5c41a2cf507e0449c8c67 Mon Sep 17 00:00:00 2001 From: mnoszczak Date: Thu, 9 Mar 2023 16:47:20 +0100 Subject: [PATCH 8/8] Add docs to filters --- labelbox/schema/export_filters.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/labelbox/schema/export_filters.py b/labelbox/schema/export_filters.py index 1942994d6..928b3dcba 100644 --- a/labelbox/schema/export_filters.py +++ b/labelbox/schema/export_filters.py @@ -11,4 +11,18 @@ class ProjectExportFilters(TypedDict): label_created_at: Optional[Tuple[str, str]] + """ Date range for labels created at + Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" + Examples: + >>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + >>> [None, "2050-01-01 00:00:00"] + >>> ["2000-01-01 00:00:00", None] + """ last_activity_at: Optional[Tuple[str, str]] + """ Date range for last activity at + Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" + Examples: + >>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + >>> [None, "2050-01-01 00:00:00"] + >>> ["2000-01-01 00:00:00", None] + """