diff --git a/labelbox/schema/export_filters.py b/labelbox/schema/export_filters.py new file mode 100644 index 000000000..928b3dcba --- /dev/null +++ b/labelbox/schema/export_filters.py @@ -0,0 +1,28 @@ +import sys + +from typing import Optional +if sys.version_info >= (3, 8): + from typing import TypedDict +else: + from typing_extensions import TypedDict + +from typing import Tuple + + +class ProjectExportFilters(TypedDict): + label_created_at: Optional[Tuple[str, str]] + """ Date range for labels created at + Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" + Examples: + >>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + >>> [None, "2050-01-01 00:00:00"] + >>> ["2000-01-01 00:00:00", None] + """ + last_activity_at: Optional[Tuple[str, str]] + """ Date range for last activity at + Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" + Examples: + >>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + >>> [None, "2050-01-01 00:00:00"] + >>> ["2000-01-01 00:00:00", None] + """ diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index c260a4ada..99196fcf2 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -4,7 +4,7 @@ from collections import namedtuple from datetime import datetime, timezone from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union +from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, List, Optional, Union from urllib.parse import urlparse import ndjson @@ -20,6 +20,7 @@ from labelbox.pagination import PaginatedCollection from labelbox.schema.consensus_settings import ConsensusSettings from labelbox.schema.data_row import DataRow +from labelbox.schema.export_filters import ProjectExportFilters from labelbox.schema.export_params import ProjectExportParams from labelbox.schema.media_type import MediaType from labelbox.schema.queue_mode import QueueMode @@ -46,6 +47,20 @@ logger = logging.getLogger(__name__) +def _validate_datetime(string_date: str) -> bool: + """helper function validate that datetime is as follows: YYYY-MM-DD for the export""" + if string_date: + for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"): + try: + datetime.strptime(string_date, fmt) + return True + except ValueError: + pass + raise ValueError(f"""Incorrect format for: {string_date}. + Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""") + return True + + class Project(DbObject, Updateable, Deletable): """ A Project is a container that includes a labeling frontend, an ontology, datasets and labels. @@ -337,19 +352,6 @@ def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str: if dictionary.get(c) ]) - def _validate_datetime(string_date: str) -> bool: - """helper function validate that datetime is as follows: YYYY-MM-DD for the export""" - if string_date: - for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"): - try: - datetime.strptime(string_date, fmt) - return True - except ValueError: - pass - raise ValueError(f"""Incorrect format for: {string_date}. - Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""") - return True - sleep_time = 2 id_param = "projectId" filter_param = "" @@ -400,16 +402,27 @@ def _validate_datetime(string_date: str) -> bool: self.uid) time.sleep(sleep_time) - """ - Creates a project run export task with the given params and returns the task. - - >>> export_task = export_v2("my_export_task", filter={"media_attributes": True}) - - """ - def export_v2(self, task_name: Optional[str] = None, + filters: Optional[ProjectExportFilters] = None, params: Optional[ProjectExportParams] = None) -> Task: + """ + Creates a project run export task with the given params and returns the task. + + For more information visit: https://docs.labelbox.com/docs/exports-v2#export-from-a-project-python-sdk + + >>> task = project.export_v2( + >>> filters={ + >>> "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + >>> "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + >>> }, + >>> params={ + >>> "include_performance_details": False, + >>> "include_labels": True + >>> }) + >>> task.wait_till_done() + >>> task.result + """ _params = params or ProjectExportParams({ "attachments": False, @@ -420,15 +433,33 @@ def export_v2(self, "label_details": False }) + _filters = filters or ProjectExportFilters({ + "last_activity_at": None, + "label_created_at": None + }) + + def _get_timezone() -> str: + timezone_query_str = """query CurrentUserPyApi { user { timezone } }""" + tz_res = self.client.execute(timezone_query_str) + return tz_res["user"]["timezone"] or "UTC" + + timezone: Optional[str] = None + mutation_name = "exportDataRowsInProject" create_task_query_str = """mutation exportDataRowsInProjectPyApi($input: ExportDataRowsInProjectInput!){ %s(input: $input) {taskId} } """ % (mutation_name) + + search_query: List[Dict[str, Collection[str]]] = [] query_params = { "input": { "taskName": task_name, "filters": { - "projectId": self.uid + "projectId": self.uid, + "searchQuery": { + "scope": None, + "query": search_query + } }, "params": { "includeAttachments": @@ -446,6 +477,84 @@ def export_v2(self, }, } } + + if "last_activity_at" in _filters and _filters[ + 'last_activity_at'] is not None: + if timezone is None: + timezone = _get_timezone() + values = _filters['last_activity_at'] + start, end = values + if (start is not None and end is not None): + [_validate_datetime(date) for date in values] + search_query.append({ + "type": "data_row_last_activity_at", + "value": { + "operator": "BETWEEN", + "timezone": timezone, + "value": { + "min": start, + "max": end + } + } + }) + elif (start is not None): + _validate_datetime(start) + search_query.append({ + "type": "data_row_last_activity_at", + "value": { + "operator": "GREATER_THAN_OR_EQUAL", + "timezone": timezone, + "value": start + } + }) + elif (end is not None): + _validate_datetime(end) + search_query.append({ + "type": "data_row_last_activity_at", + "value": { + "operator": "LESS_THAN_OR_EQUAL", + "timezone": timezone, + "value": end + } + }) + + if "label_created_at" in _filters and _filters[ + "label_created_at"] is not None: + if timezone is None: + timezone = _get_timezone() + values = _filters['label_created_at'] + start, end = values + if (start is not None and end is not None): + [_validate_datetime(date) for date in values] + search_query.append({ + "type": "labeled_at", + "value": { + "operator": "BETWEEN", + "value": { + "min": start, + "max": end + } + } + }) + elif (start is not None): + _validate_datetime(start) + search_query.append({ + "type": "labeled_at", + "value": { + "operator": "GREATER_THAN_OR_EQUAL", + "value": start + } + }) + elif (end is not None): + _validate_datetime(end) + search_query.append({ + "type": "labeled_at", + "value": { + "operator": "LESS_THAN_OR_EQUAL", + "value": end + } + }) + res = self.client.execute( create_task_query_str, query_params, diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index f16b44e13..bd3495af3 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -53,15 +53,32 @@ def test_project_export_v2(configured_project_with_label): include_performance_details = True task = project.export_v2( task_name, + filters={ + "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], + "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] + }, params={ "include_performance_details": include_performance_details, "include_labels": True }) + + task_to = project.export_v2( + filters={"last_activity_at": [None, "2050-01-01 00:00:00"]}) + + task_from = project.export_v2( + filters={"label_created_at": ["2000-01-01 00:00:00", None]}) + assert task.name == task_name task.wait_till_done() assert task.status == "COMPLETE" assert task.errors is None + task_to.wait_till_done() + assert task_to.status == "COMPLETE" + + task_from.wait_till_done() + assert task_from.status == "COMPLETE" + for task_result in task.result: task_project = task_result['projects'][project.uid] task_project_label_ids_set = set(