-
Notifications
You must be signed in to change notification settings - Fork 68
[AL-5192] Add last_activity_at / label_created_at export v2 support #976
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8e5c65d
d8042ea
be1ee2e
089f0a7
0f77dd5
ef1df39
78e052b
c1f8af0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| import sys | ||
|
|
||
| from typing import Optional | ||
| if sys.version_info >= (3, 8): | ||
| from typing import TypedDict | ||
| else: | ||
| from typing_extensions import TypedDict | ||
|
|
||
| from typing import Tuple | ||
|
|
||
|
|
||
| class ProjectExportFilters(TypedDict): | ||
| label_created_at: Optional[Tuple[str, str]] | ||
| """ Date range for labels created at | ||
| Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" | ||
| Examples: | ||
| >>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] | ||
| >>> [None, "2050-01-01 00:00:00"] | ||
| >>> ["2000-01-01 00:00:00", None] | ||
| """ | ||
| last_activity_at: Optional[Tuple[str, str]] | ||
| """ Date range for last activity at | ||
| Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss" | ||
| Examples: | ||
| >>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] | ||
| >>> [None, "2050-01-01 00:00:00"] | ||
| >>> ["2000-01-01 00:00:00", None] | ||
| """ | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |||||
| from collections import namedtuple | ||||||
| from datetime import datetime, timezone | ||||||
| from pathlib import Path | ||||||
| from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union | ||||||
| from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, List, Optional, Union | ||||||
| from urllib.parse import urlparse | ||||||
|
|
||||||
| import ndjson | ||||||
|
|
@@ -20,6 +20,7 @@ | |||||
| from labelbox.pagination import PaginatedCollection | ||||||
| from labelbox.schema.consensus_settings import ConsensusSettings | ||||||
| from labelbox.schema.data_row import DataRow | ||||||
| from labelbox.schema.export_filters import ProjectExportFilters | ||||||
| from labelbox.schema.export_params import ProjectExportParams | ||||||
| from labelbox.schema.media_type import MediaType | ||||||
| from labelbox.schema.queue_mode import QueueMode | ||||||
|
|
@@ -46,6 +47,20 @@ | |||||
| logger = logging.getLogger(__name__) | ||||||
|
|
||||||
|
|
||||||
| def _validate_datetime(string_date: str) -> bool: | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a utils etc package to move it to?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope - right now we're using that only in project file scope, I would move it into utils once there's use case outside of that file |
||||||
| """helper function validate that datetime is as follows: YYYY-MM-DD for the export""" | ||||||
| if string_date: | ||||||
| for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"): | ||||||
| try: | ||||||
| datetime.strptime(string_date, fmt) | ||||||
| return True | ||||||
| except ValueError: | ||||||
| pass | ||||||
| raise ValueError(f"""Incorrect format for: {string_date}. | ||||||
| Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""") | ||||||
| return True | ||||||
|
|
||||||
|
|
||||||
| class Project(DbObject, Updateable, Deletable): | ||||||
| """ A Project is a container that includes a labeling frontend, an ontology, | ||||||
| datasets and labels. | ||||||
|
|
@@ -337,19 +352,6 @@ def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str: | |||||
| if dictionary.get(c) | ||||||
| ]) | ||||||
|
|
||||||
| def _validate_datetime(string_date: str) -> bool: | ||||||
| """helper function validate that datetime is as follows: YYYY-MM-DD for the export""" | ||||||
| if string_date: | ||||||
| for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"): | ||||||
| try: | ||||||
| datetime.strptime(string_date, fmt) | ||||||
| return True | ||||||
| except ValueError: | ||||||
| pass | ||||||
| raise ValueError(f"""Incorrect format for: {string_date}. | ||||||
| Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""") | ||||||
| return True | ||||||
|
|
||||||
| sleep_time = 2 | ||||||
| id_param = "projectId" | ||||||
| filter_param = "" | ||||||
|
|
@@ -400,16 +402,27 @@ def _validate_datetime(string_date: str) -> bool: | |||||
| self.uid) | ||||||
| time.sleep(sleep_time) | ||||||
|
|
||||||
| """ | ||||||
| Creates a project run export task with the given params and returns the task. | ||||||
|
|
||||||
| >>> export_task = export_v2("my_export_task", filter={"media_attributes": True}) | ||||||
|
|
||||||
| """ | ||||||
|
|
||||||
| def export_v2(self, | ||||||
| task_name: Optional[str] = None, | ||||||
| filters: Optional[ProjectExportFilters] = None, | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is the intend of default
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the default value is not provided, a function call fails with: |
||||||
| params: Optional[ProjectExportParams] = None) -> Task: | ||||||
| """ | ||||||
| Creates a project run export task with the given params and returns the task. | ||||||
|
|
||||||
| For more information visit: https://docs.labelbox.com/docs/exports-v2#export-from-a-project-python-sdk | ||||||
|
|
||||||
| >>> task = project.export_v2( | ||||||
| >>> filters={ | ||||||
| >>> "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"], | ||||||
| >>> "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"] | ||||||
| >>> }, | ||||||
| >>> params={ | ||||||
| >>> "include_performance_details": False, | ||||||
| >>> "include_labels": True | ||||||
| >>> }) | ||||||
| >>> task.wait_till_done() | ||||||
| >>> task.result | ||||||
| """ | ||||||
|
|
||||||
| _params = params or ProjectExportParams({ | ||||||
| "attachments": False, | ||||||
|
|
@@ -420,15 +433,33 @@ def export_v2(self, | |||||
| "label_details": False | ||||||
| }) | ||||||
|
|
||||||
| _filters = filters or ProjectExportFilters({ | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. another nitpick, I like to wrap those in a default fun like |
||||||
| "last_activity_at": None, | ||||||
| "label_created_at": None | ||||||
| }) | ||||||
|
|
||||||
| def _get_timezone() -> str: | ||||||
| timezone_query_str = """query CurrentUserPyApi { user { timezone } }""" | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again, this is prob more for my own education, but don't we have some sort of sdk-side authenticated user object that should already expose time sone?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've looked into the codebase myself and asked @kkim-labelbox about that - looks like we haven't had an use case to fetch timezone before. |
||||||
| tz_res = self.client.execute(timezone_query_str) | ||||||
| return tz_res["user"]["timezone"] or "UTC" | ||||||
|
|
||||||
| timezone: Optional[str] = None | ||||||
|
|
||||||
| mutation_name = "exportDataRowsInProject" | ||||||
| create_task_query_str = """mutation exportDataRowsInProjectPyApi($input: ExportDataRowsInProjectInput!){ | ||||||
| %s(input: $input) {taskId} } | ||||||
| """ % (mutation_name) | ||||||
|
|
||||||
| search_query: List[Dict[str, Collection[str]]] = [] | ||||||
| query_params = { | ||||||
| "input": { | ||||||
| "taskName": task_name, | ||||||
| "filters": { | ||||||
| "projectId": self.uid | ||||||
| "projectId": self.uid, | ||||||
| "searchQuery": { | ||||||
| "scope": None, | ||||||
| "query": search_query | ||||||
| } | ||||||
| }, | ||||||
| "params": { | ||||||
| "includeAttachments": | ||||||
|
|
@@ -446,6 +477,84 @@ def export_v2(self, | |||||
| }, | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if "last_activity_at" in _filters and _filters[ | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've started with that approach, but if I use unsafe getter, Perhaps |
||||||
| 'last_activity_at'] is not None: | ||||||
| if timezone is None: | ||||||
| timezone = _get_timezone() | ||||||
| values = _filters['last_activity_at'] | ||||||
| start, end = values | ||||||
| if (start is not None and end is not None): | ||||||
| [_validate_datetime(date) for date in values] | ||||||
| search_query.append({ | ||||||
| "type": "data_row_last_activity_at", | ||||||
| "value": { | ||||||
| "operator": "BETWEEN", | ||||||
| "timezone": timezone, | ||||||
| "value": { | ||||||
| "min": start, | ||||||
| "max": end | ||||||
| } | ||||||
| } | ||||||
| }) | ||||||
| elif (start is not None): | ||||||
| _validate_datetime(start) | ||||||
| search_query.append({ | ||||||
| "type": "data_row_last_activity_at", | ||||||
| "value": { | ||||||
| "operator": "GREATER_THAN_OR_EQUAL", | ||||||
| "timezone": timezone, | ||||||
| "value": start | ||||||
| } | ||||||
| }) | ||||||
| elif (end is not None): | ||||||
| _validate_datetime(end) | ||||||
| search_query.append({ | ||||||
| "type": "data_row_last_activity_at", | ||||||
| "value": { | ||||||
| "operator": "LESS_THAN_OR_EQUAL", | ||||||
| "timezone": timezone, | ||||||
| "value": end | ||||||
| } | ||||||
| }) | ||||||
|
|
||||||
| if "label_created_at" in _filters and _filters[ | ||||||
| "label_created_at"] is not None: | ||||||
| if timezone is None: | ||||||
| timezone = _get_timezone() | ||||||
| values = _filters['label_created_at'] | ||||||
| start, end = values | ||||||
| if (start is not None and end is not None): | ||||||
| [_validate_datetime(date) for date in values] | ||||||
| search_query.append({ | ||||||
| "type": "labeled_at", | ||||||
| "value": { | ||||||
| "operator": "BETWEEN", | ||||||
| "value": { | ||||||
| "min": start, | ||||||
| "max": end | ||||||
| } | ||||||
| } | ||||||
| }) | ||||||
| elif (start is not None): | ||||||
| _validate_datetime(start) | ||||||
| search_query.append({ | ||||||
| "type": "labeled_at", | ||||||
| "value": { | ||||||
| "operator": "GREATER_THAN_OR_EQUAL", | ||||||
| "value": start | ||||||
| } | ||||||
| }) | ||||||
| elif (end is not None): | ||||||
| _validate_datetime(end) | ||||||
| search_query.append({ | ||||||
| "type": "labeled_at", | ||||||
| "value": { | ||||||
| "operator": "LESS_THAN_OR_EQUAL", | ||||||
| "value": end | ||||||
| } | ||||||
| }) | ||||||
|
|
||||||
| res = self.client.execute( | ||||||
| create_task_query_str, | ||||||
| query_params, | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like a good place to add a docstring to describe each of these filter items in detail, and maybe put the behavior for how we fetch the timezones as well
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we could do a follow up PR with improved docs. At this point we're linking to the docs through URL in export_v2's docstring, which should give good guidance.
I don't want to block the release with the docs.