Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions labelbox/schema/export_filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import sys

from typing import Optional
if sys.version_info >= (3, 8):
from typing import TypedDict
else:
from typing_extensions import TypedDict

from typing import Tuple


class ProjectExportFilters(TypedDict):
label_created_at: Optional[Tuple[str, str]]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a good place to add a docstring to describe each of these filter items in detail, and maybe put the behavior for how we fetch the timezones as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could do a follow up PR with improved docs. At this point we're linking to the docs through URL in export_v2's docstring, which should give good guidance.

I don't want to block the release with the docs.

""" Date range for labels created at
Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
Examples:
>>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"]
>>> [None, "2050-01-01 00:00:00"]
>>> ["2000-01-01 00:00:00", None]
"""
last_activity_at: Optional[Tuple[str, str]]
""" Date range for last activity at
Formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
Examples:
>>> ["2000-01-01 00:00:00", "2050-01-01 00:00:00"]
>>> [None, "2050-01-01 00:00:00"]
>>> ["2000-01-01 00:00:00", None]
"""
153 changes: 131 additions & 22 deletions labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import namedtuple
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, List, Optional, Union
from urllib.parse import urlparse

import ndjson
Expand All @@ -20,6 +20,7 @@
from labelbox.pagination import PaginatedCollection
from labelbox.schema.consensus_settings import ConsensusSettings
from labelbox.schema.data_row import DataRow
from labelbox.schema.export_filters import ProjectExportFilters
from labelbox.schema.export_params import ProjectExportParams
from labelbox.schema.media_type import MediaType
from labelbox.schema.queue_mode import QueueMode
Expand All @@ -46,6 +47,20 @@
logger = logging.getLogger(__name__)


def _validate_datetime(string_date: str) -> bool:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a utils etc package to move it to?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope - right now we're using that only in project file scope, I would move it into utils once there's use case outside of that file

"""helper function validate that datetime is as follows: YYYY-MM-DD for the export"""
if string_date:
for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"):
try:
datetime.strptime(string_date, fmt)
return True
except ValueError:
pass
raise ValueError(f"""Incorrect format for: {string_date}.
Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""")
return True


class Project(DbObject, Updateable, Deletable):
""" A Project is a container that includes a labeling frontend, an ontology,
datasets and labels.
Expand Down Expand Up @@ -337,19 +352,6 @@ def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str:
if dictionary.get(c)
])

def _validate_datetime(string_date: str) -> bool:
"""helper function validate that datetime is as follows: YYYY-MM-DD for the export"""
if string_date:
for fmt in ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S"):
try:
datetime.strptime(string_date, fmt)
return True
except ValueError:
pass
raise ValueError(f"""Incorrect format for: {string_date}.
Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""")
return True

sleep_time = 2
id_param = "projectId"
filter_param = ""
Expand Down Expand Up @@ -400,16 +402,27 @@ def _validate_datetime(string_date: str) -> bool:
self.uid)
time.sleep(sleep_time)

"""
Creates a project run export task with the given params and returns the task.

>>> export_task = export_v2("my_export_task", filter={"media_attributes": True})

"""

def export_v2(self,
task_name: Optional[str] = None,
filters: Optional[ProjectExportFilters] = None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the intend of default = None to allow to pass filters value as None as opposed to just missing variable filters? If not, a default is not required. Also see here https://docs.python.org/3.9/library/typing.html#typing.Optional
PS This seems to be the pattern we have been following, so just curious

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the default value is not provided, a function call fails with:

 export_v2() missing 2 required positional arguments: 'task_name' and 'params'

params: Optional[ProjectExportParams] = None) -> Task:
"""
Creates a project run export task with the given params and returns the task.

For more information visit: https://docs.labelbox.com/docs/exports-v2#export-from-a-project-python-sdk

>>> task = project.export_v2(
>>> filters={
>>> "last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
>>> "label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"]
>>> },
>>> params={
>>> "include_performance_details": False,
>>> "include_labels": True
>>> })
>>> task.wait_till_done()
>>> task.result
"""

_params = params or ProjectExportParams({
"attachments": False,
Expand All @@ -420,15 +433,33 @@ def export_v2(self,
"label_details": False
})

_filters = filters or ProjectExportFilters({
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

another nitpick, I like to wrap those in a default fun like

class ProjectExportFilters
  @classmethod
  def null_filter():
   self({...})

"last_activity_at": None,
"label_created_at": None
})

def _get_timezone() -> str:
timezone_query_str = """query CurrentUserPyApi { user { timezone } }"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again, this is prob more for my own education, but don't we have some sort of sdk-side authenticated user object that should already expose time sone?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've looked into the codebase myself and asked @kkim-labelbox about that - looks like we haven't had an use case to fetch timezone before.

tz_res = self.client.execute(timezone_query_str)
return tz_res["user"]["timezone"] or "UTC"

timezone: Optional[str] = None

mutation_name = "exportDataRowsInProject"
create_task_query_str = """mutation exportDataRowsInProjectPyApi($input: ExportDataRowsInProjectInput!){
%s(input: $input) {taskId} }
""" % (mutation_name)

search_query: List[Dict[str, Collection[str]]] = []
query_params = {
"input": {
"taskName": task_name,
"filters": {
"projectId": self.uid
"projectId": self.uid,
"searchQuery": {
"scope": None,
"query": search_query
}
},
"params": {
"includeAttachments":
Expand All @@ -446,6 +477,84 @@ def export_v2(self,
},
}
}

if "last_activity_at" in _filters and _filters[
Copy link
Contributor

@vbrodsky vbrodsky Mar 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if "last_activity_at" in _filters and _filters[
if _filters.get("last_activity_at", None) is not None:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've started with that approach, but if I use unsafe getter, mypy will be bothered with this piece:

            values = _filters['last_activity_at']
            start, end = values

Perhaps mypy is not good enough with typings?

'last_activity_at'] is not None:
if timezone is None:
timezone = _get_timezone()
values = _filters['last_activity_at']
start, end = values
if (start is not None and end is not None):
[_validate_datetime(date) for date in values]
search_query.append({
"type": "data_row_last_activity_at",
"value": {
"operator": "BETWEEN",
"timezone": timezone,
"value": {
"min": start,
"max": end
}
}
})
elif (start is not None):
_validate_datetime(start)
search_query.append({
"type": "data_row_last_activity_at",
"value": {
"operator": "GREATER_THAN_OR_EQUAL",
"timezone": timezone,
"value": start
}
})
elif (end is not None):
_validate_datetime(end)
search_query.append({
"type": "data_row_last_activity_at",
"value": {
"operator": "LESS_THAN_OR_EQUAL",
"timezone": timezone,
"value": end
}
})

if "label_created_at" in _filters and _filters[
"label_created_at"] is not None:
if timezone is None:
timezone = _get_timezone()
values = _filters['label_created_at']
start, end = values
if (start is not None and end is not None):
[_validate_datetime(date) for date in values]
search_query.append({
"type": "labeled_at",
"value": {
"operator": "BETWEEN",
"value": {
"min": start,
"max": end
}
}
})
elif (start is not None):
_validate_datetime(start)
search_query.append({
"type": "labeled_at",
"value": {
"operator": "GREATER_THAN_OR_EQUAL",
"value": start
}
})
elif (end is not None):
_validate_datetime(end)
search_query.append({
"type": "labeled_at",
"value": {
"operator": "LESS_THAN_OR_EQUAL",
"value": end
}
})

res = self.client.execute(
create_task_query_str,
query_params,
Expand Down
17 changes: 17 additions & 0 deletions tests/integration/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,32 @@ def test_project_export_v2(configured_project_with_label):
include_performance_details = True
task = project.export_v2(
task_name,
filters={
"last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
"label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"]
},
params={
"include_performance_details": include_performance_details,
"include_labels": True
})

task_to = project.export_v2(
filters={"last_activity_at": [None, "2050-01-01 00:00:00"]})

task_from = project.export_v2(
filters={"label_created_at": ["2000-01-01 00:00:00", None]})

assert task.name == task_name
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None

task_to.wait_till_done()
assert task_to.status == "COMPLETE"

task_from.wait_till_done()
assert task_from.status == "COMPLETE"

for task_result in task.result:
task_project = task_result['projects'][project.uid]
task_project_label_ids_set = set(
Expand Down