From 028d92d4844da75a3be66f7cc01aa96922097d49 Mon Sep 17 00:00:00 2001 From: rllin Date: Tue, 4 Aug 2020 20:59:38 -0700 Subject: [PATCH 01/32] wip --- labelbox/schema/project.py | 43 +++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 5b7272924..b20d466d2 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -113,7 +113,7 @@ def export_labels(self, timeout_seconds=60): payload, and returns the URL to that payload. Will only generate a new URL at a max frequency of 30 min. - + Args: timeout_seconds (float): Max waiting time, in seconds. Returns: @@ -352,6 +352,47 @@ def create_prediction(self, label, data_row, prediction_model=None): res = self.client.execute(query_str, params) return Prediction(self.client, res["createPrediction"]) + def upload_annotations( + self, + name: str, + url: str = None, + path: str = None, + annotations: Iterable[dict] = None + ) -> None: + + exclusionary_args = iter((url, path, annotations)) + # any will stop iteration when it finds a single truthy value + # not any will continue with where the iteration left off and + # check the rest are falsy + assert (any(exclusionary_args) + and not any(exclusionary_args)), ( + 'Only one of url, path, or annotations can be specified.' + ) + + shared_args = { + 'client': self.client, + 'project_id': self.uid, + 'name': name + } + + if url: + return BulkImportRequest.create_from_url( + **shared_args, + url=url, + ) + if file: + return BulkImportRequest.create_from_local_file( + **shared_args, + file=file, + validate_file=True, + ) + if annotations: + return BulkImportRequest.create_from_objects( + **shared_args, + predictions=annotations, + ) + + class LabelingParameterOverride(DbObject): priority = Field.Int("priority") From c23c2d6c2c08734c3c2e5125e5b135203d0743ec Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 10:39:04 -0700 Subject: [PATCH 02/32] replace BIR call with project call --- labelbox/schema/project.py | 79 ++++++++++++------- tests/integration/test_bulk_import_request.py | 32 ++++---- 2 files changed, 66 insertions(+), 45 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index b20d466d2..e8cb6d6e7 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -353,47 +353,68 @@ def create_prediction(self, label, data_row, prediction_model=None): return Prediction(self.client, res["createPrediction"]) def upload_annotations( - self, - name: str, - url: str = None, - path: str = None, - annotations: Iterable[dict] = None - ) -> None: - - exclusionary_args = iter((url, path, annotations)) - # any will stop iteration when it finds a single truthy value - # not any will continue with where the iteration left off and - # check the rest are falsy - assert (any(exclusionary_args) - and not any(exclusionary_args)), ( - 'Only one of url, path, or annotations can be specified.' - ) + self, + name: str, + annotations: Union[str, Iterable[dict]], + validate_file: bool = True, + ) -> 'BulkImportRequest': + """ Uploads annotations to a project. + + Args: + name: name of the BulkImportRequest job + annotations: + url that is publically accessible by Labelbox containing an + ndjson file + OR local path to an ndjson file + OR iterable of annotation rows + validate_file: a flag indicating if validation should be performed + on the local ndjson file specified as `annotations` + Returns: + BulkImportRequest + """ shared_args = { 'client': self.client, 'project_id': self.uid, - 'name': name + 'name': name, } - if url: - return BulkImportRequest.create_from_url( - **shared_args, - url=url, - ) - if file: - return BulkImportRequest.create_from_local_file( - **shared_args, - file=file, - validate_file=True, - ) - if annotations: + if isinstance(annotations, str): + + def _is_url_valid(url: str) -> bool: + """ Verifies that the given string is a valid url. + + Args: + url: string to be checked + Returns: + True if the given url is valid otherwise False + + """ + parsed = urlparse(url) + return parsed.http and parsed.netloc + + if _is_url_valid(annotations): + return BulkImportRequest.create_from_url( + **shared_args, + url=annotations, + ) + else: + if not os.path.exists(annotations): + raise FileNotFoundError( + f'{annotations} is not a valid url nor existing local file' + ) + return BulkImportRequest.create_from_local_file( + **shared_args, + file=annotations, + validate_file=True, + ) + else: return BulkImportRequest.create_from_objects( **shared_args, predictions=annotations, ) - class LabelingParameterOverride(DbObject): priority = Field.Int("priority") number_of_labels = Field.Int("number_of_labels") diff --git a/tests/integration/test_bulk_import_request.py b/tests/integration/test_bulk_import_request.py index 8a4fde629..1f35a06c3 100644 --- a/tests/integration/test_bulk_import_request.py +++ b/tests/integration/test_bulk_import_request.py @@ -43,12 +43,11 @@ }] -def test_create_from_url(client, project): +def test_create_from_url(project): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - bulk_import_request = BulkImportRequest.create_from_url( - client, project.uid, name, url) + bulk_import_request = project.upload_annotations(name=name, annotations=url) assert bulk_import_request.project() == project assert bulk_import_request.name == name @@ -58,11 +57,11 @@ def test_create_from_url(client, project): assert bulk_import_request.state == BulkImportRequestState.RUNNING -def test_create_from_objects(client, project): +def test_create_from_objects(project): name = str(uuid.uuid4()) - bulk_import_request = BulkImportRequest.create_from_objects( - client, project.uid, name, PREDICTIONS) + bulk_import_request = project.upload_annotations(name=name, + annotations=PREDICTIONS) assert bulk_import_request.project() == project assert bulk_import_request.name == name @@ -72,15 +71,15 @@ def test_create_from_objects(client, project): __assert_file_content(bulk_import_request.input_file_url) -def test_create_from_local_file(tmp_path, client, project): +def test_create_from_local_file(tmp_path, project): name = str(uuid.uuid4()) file_name = f"{name}.ndjson" file_path = tmp_path / file_name with file_path.open("w") as f: ndjson.dump(PREDICTIONS, f) - bulk_import_request = BulkImportRequest.create_from_local_file( - client, project.uid, name, file_path) + bulk_import_request = project.upload_annotations(name=name, + annotations=file_path) assert bulk_import_request.project() == project assert bulk_import_request.name == name @@ -93,7 +92,7 @@ def test_create_from_local_file(tmp_path, client, project): def test_get(client, project): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - BulkImportRequest.create_from_url(client, project.uid, name, url) + project.upload_annotations(name=name, annotations=url) bulk_import_request = BulkImportRequest.get(client, project.uid, name) @@ -105,23 +104,24 @@ def test_get(client, project): assert bulk_import_request.state == BulkImportRequestState.RUNNING -def test_validate_ndjson(tmp_path, client, project): +def test_validate_ndjson(tmp_path, project): file_name = f"broken.ndjson" file_path = tmp_path / file_name with file_path.open("w") as f: f.write("test") with pytest.raises(ValueError): - BulkImportRequest.create_from_local_file(client, project.uid, "name", - file_path) + project.upload_annotations(name="name", annotations=file_path) @pytest.mark.slow -def test_wait_till_done(client, project): +def test_wait_till_done(project): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - bulk_import_request = BulkImportRequest.create_from_url( - client, project.uid, name, url) + bulk_import_request = project.upload_annotations( + name=name, + annotations=url, + ) bulk_import_request.wait_until_done() From 369b78014ad96280902685b84b00a278e659daf6 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 10:46:21 -0700 Subject: [PATCH 03/32] fix mypy --- labelbox/schema/project.py | 1 + 1 file changed, 1 insertion(+) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index e8cb6d6e7..e322a6e9f 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -3,6 +3,7 @@ import json import logging import time +from typing import Union, Iterable from labelbox import utils from labelbox.exceptions import InvalidQueryError From 9d68db1e5f5225c6b0f678f226421baf731afab2 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 12:07:28 -0700 Subject: [PATCH 04/32] clean up classmethods --- labelbox/schema/bulk_import_request.py | 401 ++++++++++++------------- labelbox/schema/project.py | 15 +- 2 files changed, 206 insertions(+), 210 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 8bb861c59..440673110 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -25,6 +25,85 @@ logger = logging.getLogger(__name__) +def __make_file_name(project_id: str, name: str) -> str: + return f"{project_id}__{name}.ndjson" + + +# TODO(gszpak): all the code below should be handled automatically by Relationship +def __build_results_query_part() -> str: + return """ + project { + %s + } + createdBy { + %s + } + %s + """ % (query.results_query_part(Project), query.results_query_part(User), + query.results_query_part(BulkImportRequest)) + + +# TODO(gszpak): move it to client.py +def _make_request_data(project_id: str, name: str, content_length: int, + file_name: str) -> dict: + query_str = """mutation createBulkImportRequestFromFilePyApi( + $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + filePayload: { + file: $file, + contentLength: $contentLength + } + }) { + %s + } + } + """ % __build_results_query_part() + variables = { + "projectId": project_id, + "name": name, + "file": None, + "contentLength": content_length + } + operations = json.dumps({"variables": variables, "query": query_str}) + + return { + "operations": operations, + "map": (None, json.dumps({file_name: ["variables.file"]})) + } + + +# TODO(gszpak): move it to client.py +def __send_create_file_command( + cls, client: Client, request_data: dict, file_name: str, + file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: + response = requests.post( + client.endpoint, + headers={"authorization": "Bearer %s" % client.api_key}, + data=request_data, + files={file_name: file_data}) + + try: + response_json = response.json() + except ValueError: + raise labelbox.exceptions.LabelboxError( + "Failed to parse response as JSON: %s" % response.text) + + response_data = response_json.get("data", None) + if response_data is None: + raise labelbox.exceptions.LabelboxError( + "Failed to upload, message: %s" % response_json.get("errors", None)) + + if not response_data.get("createBulkImportRequest", None): + raise labelbox.exceptions.LabelboxError( + "Failed to create BulkImportRequest, message: %s" % + response_json.get("errors", None) or + response_data.get("error", None)) + + return response_data + + class BulkImportRequest(DbObject): project = Relationship.ToOne("Project") name = Field.String("name") @@ -35,120 +114,9 @@ class BulkImportRequest(DbObject): status_file_url = Field.String("status_file_url") state = Field.Enum(BulkImportRequestState, "state") - @classmethod - def create_from_url(cls, client: Client, project_id: str, name: str, - url: str) -> 'BulkImportRequest': - """ - Creates a BulkImportRequest from a publicly accessible URL - to an ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - url (str): publicly accessible URL pointing to ndjson file containing predictions - Returns: - BulkImportRequest object - """ - query_str = """mutation createBulkImportRequestPyApi( - $projectId: ID!, $name: String!, $fileUrl: String!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - fileUrl: $fileUrl - }) { - %s - } - } - """ % cls.__build_results_query_part() - params = {"projectId": project_id, "name": name, "fileUrl": url} - bulk_import_request_response = client.execute(query_str, params=params) - return cls.__build_bulk_import_request_from_result( - client, bulk_import_request_response["createBulkImportRequest"]) - - @classmethod - def create_from_objects(cls, client: Client, project_id: str, name: str, - predictions: Iterable[dict]) -> 'BulkImportRequest': - """ - Creates a BulkImportRequest from an iterable of dictionaries conforming to - JSON predictions format, e.g.: - ``{ - "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", - "schemaId": "ckappz7d700gn0zbocmqkwd9i", - "dataRow": { - "id": "ck1s02fqxm8fi0757f0e6qtdc" - }, - "bbox": { - "top": 48, - "left": 58, - "height": 865, - "width": 1512 - } - }`` - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - predictions (Iterable[dict]): iterable of dictionaries representing predictions - Returns: - BulkImportRequest object - """ - data_str = ndjson.dumps(predictions) - data = data_str.encode('utf-8') - file_name = cls.__make_file_name(project_id, name) - request_data = cls.__make_request_data(project_id, name, len(data_str), - file_name) - file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = cls.__send_create_file_command(client, request_data, - file_name, file_data) - return cls.__build_bulk_import_request_from_result( - client, response_data["createBulkImportRequest"]) - - @classmethod - def create_from_local_file(cls, - client: Client, - project_id: str, - name: str, - file: Path, - validate_file=True) -> 'BulkImportRequest': - """ - Creates a BulkImportRequest from a local ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - file (Path): local ndjson file with predictions - validate_file (bool): a flag indicating if there should be a validation - if `file` is a valid ndjson file - Returns: - BulkImportRequest object - """ - file_name = cls.__make_file_name(project_id, name) - content_length = file.stat().st_size - request_data = cls.__make_request_data(project_id, name, content_length, - file_name) - with file.open('rb') as f: - file_data: Tuple[str, Union[bytes, BinaryIO], str] - if validate_file: - data = f.read() - try: - ndjson.loads(data) - except ValueError: - raise ValueError(f"{file} is not a valid ndjson file") - file_data = (file.name, data, NDJSON_MIME_TYPE) - else: - file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = cls.__send_create_file_command( - client, request_data, file_name, file_data) - return cls.__build_bulk_import_request_from_result( - client, response_data["createBulkImportRequest"]) - # TODO(gszpak): building query body should be handled by the client - @classmethod - def get(cls, client: Client, project_id: str, - name: str) -> 'BulkImportRequest': + @staticmethod + def get(client: Client, project_id: str, name: str) -> 'BulkImportRequest': """ Fetches existing BulkImportRequest. @@ -168,18 +136,17 @@ def get(cls, client: Client, project_id: str, %s } } - """ % cls.__build_results_query_part() + """ % __build_results_query_part() params = {"projectId": project_id, "name": name} - bulk_import_request_kwargs = \ - client.execute(query_str, params=params).get("bulkImportRequest") + bulk_import_request_kwargs = client.execute( + query_str, params=params).get("bulkImportRequest") if bulk_import_request_kwargs is None: raise labelbox.exceptions.ResourceNotFoundError( BulkImportRequest, { "projectId": project_id, "name": name }) - return cls.__build_bulk_import_request_from_result( - client, bulk_import_request_kwargs) + return BulkImportRequest.from_result(client, bulk_import_request_kwargs) def refresh(self) -> None: """ @@ -225,90 +192,8 @@ def created_by(self): # type: ignore return self.__user return None - @classmethod - def __make_file_name(cls, project_id: str, name: str) -> str: - return f"{project_id}__{name}.ndjson" - - # TODO(gszpak): move it to client.py - @classmethod - def __make_request_data(cls, project_id: str, name: str, - content_length: int, file_name: str) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % cls.__build_results_query_part() - variables = { - "projectId": project_id, - "name": name, - "file": None, - "contentLength": content_length - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})) - } - - # TODO(gszpak): move it to client.py - @classmethod - def __send_create_file_command( - cls, client: Client, request_data: dict, file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: - response = requests.post( - client.endpoint, - headers={"authorization": "Bearer %s" % client.api_key}, - data=request_data, - files={file_name: file_data}) - - try: - response_json = response.json() - except ValueError: - raise labelbox.exceptions.LabelboxError( - "Failed to parse response as JSON: %s" % response.text) - - response_data = response_json.get("data", None) - if response_data is None: - raise labelbox.exceptions.LabelboxError( - "Failed to upload, message: %s" % - response_json.get("errors", None)) - - if not response_data.get("createBulkImportRequest", None): - raise labelbox.exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" % - response_json.get("errors", None) or - response_data.get("error", None)) - - return response_data - - # TODO(gszpak): all the code below should be handled automatically by Relationship - @classmethod - def __build_results_query_part(cls) -> str: - return """ - project { - %s - } - createdBy { - %s - } - %s - """ % (query.results_query_part(Project), - query.results_query_part(User), - query.results_query_part(BulkImportRequest)) - - @classmethod - def __build_bulk_import_request_from_result( - cls, client: Client, result: dict) -> 'BulkImportRequest': + @staticmethod + def from_result(client: Client, result: dict) -> 'BulkImportRequest': project = result.pop("project") user = result.pop("createdBy") bulk_import_request = BulkImportRequest(client, result) @@ -318,3 +203,115 @@ def __build_bulk_import_request_from_result( if user is not None: bulk_import_request.__user = User(client, user) # type: ignore return bulk_import_request + + +def create_from_url(cls, client: Client, project_id: str, name: str, + url: str) -> 'BulkImportRequest': + """ + Creates a BulkImportRequest from a publicly accessible URL + to an ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + url (str): publicly accessible URL pointing to ndjson file containing predictions + Returns: + BulkImportRequest object + """ + query_str = """mutation createBulkImportRequestPyApi( + $projectId: ID!, $name: String!, $fileUrl: String!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + fileUrl: $fileUrl + }) { + %s + } + } + """ % _build_results_query_part() + params = {"projectId": project_id, "name": name, "fileUrl": url} + bulk_import_request_response = client.execute(query_str, params=params) + return BulkImportRequest.from_result( + client, bulk_import_request_response["createBulkImportRequest"]) + + +def create_from_objects(cls, client: Client, project_id: str, name: str, + predictions: Iterable[dict]) -> 'BulkImportRequest': + """ + Creates a BulkImportRequest from an iterable of dictionaries conforming to + JSON predictions format, e.g.: + ``{ + "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", + "schemaId": "ckappz7d700gn0zbocmqkwd9i", + "dataRow": { + "id": "ck1s02fqxm8fi0757f0e6qtdc" + }, + "bbox": { + "top": 48, + "left": 58, + "height": 865, + "width": 1512 + } + }`` + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + predictions (Iterable[dict]): iterable of dictionaries representing predictions + Returns: + BulkImportRequest object + """ + data_str = ndjson.dumps(predictions) + data = data_str.encode('utf-8') + file_name = __make_file_name(project_id, name) + request_data = __make_request_data(project_id, name, len(data_str), + file_name) + file_data = (file_name, data, NDJSON_MIME_TYPE) + response_data = __send_create_file_command(client, request_data, file_name, + file_data) + return BulkImportRequest.from_result( + client, response_data["createBulkImportRequest"]) + + +def create_from_local_file(cls, + client: Client, + project_id: str, + name: str, + file: Path, + validate_file=True) -> 'BulkImportRequest': + """ + Creates a BulkImportRequest from a local ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + file (Path): local ndjson file with predictions + validate_file (bool): a flag indicating if there should be a validation + if `file` is a valid ndjson file + Returns: + BulkImportRequest object + """ + file_name = __make_file_name(project_id, name) + content_length = file.stat().st_size + request_data = __make_request_data(project_id, name, content_length, + file_name) + if validate_file: + with file.open('rb') as f: + reader = ndjson.reader(f) + try: + for line in reader: + # ensure that the underlying json load call is valid + # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 + pass + except ValueError: + raise ValueError(f"{file} is not a valid ndjson file") + + with file.open('rb') as f: + file_data: Tuple[str, BinaryIO, str] = (file.name, f, NDJSON_MIME_TYPE) + response_data = __send_create_file_command(client, request_data, + file_name, file_data) + return BulkImportRequest.from_result( + client, response_data["createBulkImportRequest"]) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index e322a6e9f..d23a427b2 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -2,6 +2,7 @@ from datetime import datetime, timezone import json import logging +from urllib.parse import ulrparse import time from typing import Union, Iterable @@ -357,19 +358,16 @@ def upload_annotations( self, name: str, annotations: Union[str, Iterable[dict]], - validate_file: bool = True, ) -> 'BulkImportRequest': """ Uploads annotations to a project. Args: name: name of the BulkImportRequest job annotations: - url that is publically accessible by Labelbox containing an + url that is publicly accessible by Labelbox containing an ndjson file OR local path to an ndjson file OR iterable of annotation rows - validate_file: a flag indicating if validation should be performed - on the local ndjson file specified as `annotations` Returns: BulkImportRequest @@ -400,13 +398,14 @@ def _is_url_valid(url: str) -> bool: url=annotations, ) else: - if not os.path.exists(annotations): + path = Path(annotations) + if not path.exists(): raise FileNotFoundError( f'{annotations} is not a valid url nor existing local file' ) return BulkImportRequest.create_from_local_file( **shared_args, - file=annotations, + file=path, validate_file=True, ) else: @@ -424,5 +423,5 @@ class LabelingParameterOverride(DbObject): LabelerPerformance = namedtuple( "LabelerPerformance", "user count seconds_per_label, total_time_labeling " "consensus average_benchmark_agreement last_activity_time") -LabelerPerformance.__doc__ = "Named tuple containing info about a labeler's " \ - "performance." +LabelerPerformance.__doc__ = ( + "Named tuple containing info about a labeler's performance.") From adbb08264374640a860a9d56ed107ddfd4547b82 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 12:22:16 -0700 Subject: [PATCH 05/32] update types --- labelbox/schema/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index d23a427b2..7e9078c91 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -357,7 +357,7 @@ def create_prediction(self, label, data_row, prediction_model=None): def upload_annotations( self, name: str, - annotations: Union[str, Iterable[dict]], + annotations: Union[str, Union[str, Path], Iterable[dict]], ) -> 'BulkImportRequest': """ Uploads annotations to a project. From 90df3dbb9897cfd577041db754154f5d02e76728 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 18:45:44 -0700 Subject: [PATCH 06/32] remove project dependenc --- labelbox/schema/bulk_import_request.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 440673110..f380e45a4 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -13,7 +13,7 @@ import labelbox.exceptions from labelbox import Client -from labelbox import Project +#from labelbox import Project from labelbox import User from labelbox.orm import query from labelbox.orm.db_object import DbObject @@ -34,13 +34,24 @@ def __build_results_query_part() -> str: return """ project { %s + name + description + updatedAt + createdAt + setupComplete + lastActivityTime + autoAuditNumberOfLabels + autoAuditPercentage } createdBy { %s } %s - """ % (query.results_query_part(Project), query.results_query_part(User), - query.results_query_part(BulkImportRequest)) + """ % ( + #query.results_query_part(Project), + query.results_query_part(User), + query.results_query_part(BulkImportRequest) + ) # TODO(gszpak): move it to client.py From bc22088b26462af21b3f7c3a142a8017869a82e7 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 18:47:06 -0700 Subject: [PATCH 07/32] yapf --- labelbox/schema/bulk_import_request.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index f380e45a4..e48cfad9c 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -50,8 +50,7 @@ def __build_results_query_part() -> str: """ % ( #query.results_query_part(Project), query.results_query_part(User), - query.results_query_part(BulkImportRequest) - ) + query.results_query_part(BulkImportRequest)) # TODO(gszpak): move it to client.py From fb4812618c66d2157fbb01116e2127a92d825adb Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 18:52:46 -0700 Subject: [PATCH 08/32] mypy --- labelbox/schema/project.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 7e9078c91..6e33a9934 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -2,9 +2,10 @@ from datetime import datetime, timezone import json import logging -from urllib.parse import ulrparse +from pathlib import Path import time from typing import Union, Iterable +from urllib.parse import urlparse from labelbox import utils from labelbox.exceptions import InvalidQueryError From 36cb038ec3dfddd466d5aaef55283da3a9790fcb Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:00:29 -0700 Subject: [PATCH 09/32] fix --- labelbox/schema/bulk_import_request.py | 20 +++++++++++--------- labelbox/schema/project.py | 3 ++- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index e48cfad9c..3e314c3bd 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -33,7 +33,6 @@ def __make_file_name(project_id: str, name: str) -> str: def __build_results_query_part() -> str: return """ project { - %s name description updatedAt @@ -86,7 +85,7 @@ def _make_request_data(project_id: str, name: str, content_length: int, # TODO(gszpak): move it to client.py def __send_create_file_command( - cls, client: Client, request_data: dict, file_name: str, + client: Client, request_data: dict, file_name: str, file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: response = requests.post( client.endpoint, @@ -215,7 +214,7 @@ def from_result(client: Client, result: dict) -> 'BulkImportRequest': return bulk_import_request -def create_from_url(cls, client: Client, project_id: str, name: str, +def create_from_url(client: Client, project_id: str, name: str, url: str) -> 'BulkImportRequest': """ Creates a BulkImportRequest from a publicly accessible URL @@ -246,7 +245,7 @@ def create_from_url(cls, client: Client, project_id: str, name: str, client, bulk_import_request_response["createBulkImportRequest"]) -def create_from_objects(cls, client: Client, project_id: str, name: str, +def create_from_objects(client: Client, project_id: str, name: str, predictions: Iterable[dict]) -> 'BulkImportRequest': """ Creates a BulkImportRequest from an iterable of dictionaries conforming to @@ -279,14 +278,17 @@ def create_from_objects(cls, client: Client, project_id: str, name: str, request_data = __make_request_data(project_id, name, len(data_str), file_name) file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = __send_create_file_command(client, request_data, file_name, - file_data) + response_data = __send_create_file_command( + client=client, + request_data=request_data, + file_name=file_name, + file_data=file_data) + return BulkImportRequest.from_result( client, response_data["createBulkImportRequest"]) -def create_from_local_file(cls, - client: Client, +def create_from_local_file(client: Client, project_id: str, name: str, file: Path, @@ -320,7 +322,7 @@ def create_from_local_file(cls, raise ValueError(f"{file} is not a valid ndjson file") with file.open('rb') as f: - file_data: Tuple[str, BinaryIO, str] = (file.name, f, NDJSON_MIME_TYPE) + file_data = (file.name, f, NDJSON_MIME_TYPE) response_data = __send_create_file_command(client, request_data, file_name, file_data) return BulkImportRequest.from_result( diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 6e33a9934..b59810d90 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -8,6 +8,7 @@ from urllib.parse import urlparse from labelbox import utils +from labelbox import BulkImportRequest from labelbox.exceptions import InvalidQueryError from labelbox.orm import query from labelbox.orm.db_object import DbObject, Updateable, Deletable @@ -391,7 +392,7 @@ def _is_url_valid(url: str) -> bool: """ parsed = urlparse(url) - return parsed.http and parsed.netloc + return parsed.scheme and parsed.netloc if _is_url_valid(annotations): return BulkImportRequest.create_from_url( From 3420909b242a4589a5e8074628bade90b143bfc2 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:02:25 -0700 Subject: [PATCH 10/32] yapf --- labelbox/schema/bulk_import_request.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 3e314c3bd..5d5a5ce40 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -278,11 +278,10 @@ def create_from_objects(client: Client, project_id: str, name: str, request_data = __make_request_data(project_id, name, len(data_str), file_name) file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = __send_create_file_command( - client=client, - request_data=request_data, - file_name=file_name, - file_data=file_data) + response_data = __send_create_file_command(client=client, + request_data=request_data, + file_name=file_name, + file_data=file_data) return BulkImportRequest.from_result( client, response_data["createBulkImportRequest"]) From be5faabf067c6b7150dc16c215cf32becd0bdb58 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:06:36 -0700 Subject: [PATCH 11/32] mypy --- labelbox/schema/bulk_import_request.py | 4 ++-- labelbox/schema/project.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 5d5a5ce40..867d43936 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -53,7 +53,7 @@ def __build_results_query_part() -> str: # TODO(gszpak): move it to client.py -def _make_request_data(project_id: str, name: str, content_length: int, +def __make_request_data(project_id: str, name: str, content_length: int, file_name: str) -> dict: query_str = """mutation createBulkImportRequestFromFilePyApi( $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { @@ -238,7 +238,7 @@ def create_from_url(client: Client, project_id: str, name: str, %s } } - """ % _build_results_query_part() + """ % __build_results_query_part() params = {"projectId": project_id, "name": name, "fileUrl": url} bulk_import_request_response = client.execute(query_str, params=params) return BulkImportRequest.from_result( diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index b59810d90..f80942081 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -360,7 +360,7 @@ def upload_annotations( self, name: str, annotations: Union[str, Union[str, Path], Iterable[dict]], - ) -> 'BulkImportRequest': + ) -> BulkImportRequest: """ Uploads annotations to a project. Args: @@ -392,7 +392,7 @@ def _is_url_valid(url: str) -> bool: """ parsed = urlparse(url) - return parsed.scheme and parsed.netloc + return bool(parsed.scheme) and bool(parsed.netloc) if _is_url_valid(annotations): return BulkImportRequest.create_from_url( From bd790329b6c54f9f384d7a1b7569db12d5068b3d Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:08:01 -0700 Subject: [PATCH 12/32] ypaf --- labelbox/schema/bulk_import_request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 867d43936..52c2e5e5c 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -54,7 +54,7 @@ def __build_results_query_part() -> str: # TODO(gszpak): move it to client.py def __make_request_data(project_id: str, name: str, content_length: int, - file_name: str) -> dict: + file_name: str) -> dict: query_str = """mutation createBulkImportRequestFromFilePyApi( $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { createBulkImportRequest(data: { From 73521404da8524c45e9c60e4bb6f0cc9f757cbd4 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:15:35 -0700 Subject: [PATCH 13/32] improt --- labelbox/__init__.py | 1 + labelbox/schema/__init__.py | 1 + labelbox/schema/bulk_import_request.py | 6 +++--- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/labelbox/__init__.py b/labelbox/__init__.py index bf5b3c12b..115023bd4 100644 --- a/labelbox/__init__.py +++ b/labelbox/__init__.py @@ -1,6 +1,7 @@ name = "labelbox" from labelbox.client import Client +from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.project import Project from labelbox.schema.dataset import Dataset from labelbox.schema.data_row import DataRow diff --git a/labelbox/schema/__init__.py b/labelbox/schema/__init__.py index 580f40f21..eadb49ab8 100644 --- a/labelbox/schema/__init__.py +++ b/labelbox/schema/__init__.py @@ -1,4 +1,5 @@ import labelbox.schema.asset_metadata +import labelbox.schema.bulk_import_request import labelbox.schema.benchmark import labelbox.schema.data_row import labelbox.schema.dataset diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 52c2e5e5c..b1cb9769a 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -215,7 +215,7 @@ def from_result(client: Client, result: dict) -> 'BulkImportRequest': def create_from_url(client: Client, project_id: str, name: str, - url: str) -> 'BulkImportRequest': + url: str) -> BulkImportRequest: """ Creates a BulkImportRequest from a publicly accessible URL to an ndjson file with predictions. @@ -246,7 +246,7 @@ def create_from_url(client: Client, project_id: str, name: str, def create_from_objects(client: Client, project_id: str, name: str, - predictions: Iterable[dict]) -> 'BulkImportRequest': + predictions: Iterable[dict]) -> BulkImportRequest: """ Creates a BulkImportRequest from an iterable of dictionaries conforming to JSON predictions format, e.g.: @@ -291,7 +291,7 @@ def create_from_local_file(client: Client, project_id: str, name: str, file: Path, - validate_file=True) -> 'BulkImportRequest': + validate_file=True) -> BulkImportRequest: """ Creates a BulkImportRequest from a local ndjson file with predictions. From a25647fc4135ac77a0f2669b31960c75472c8fe3 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:19:19 -0700 Subject: [PATCH 14/32] import --- labelbox/schema/project.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index f80942081..50a366c96 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -8,7 +8,7 @@ from urllib.parse import urlparse from labelbox import utils -from labelbox import BulkImportRequest +from labelbox.schema.bulk_import_request import create_from_url, create_from_objects, create_from_local_file from labelbox.exceptions import InvalidQueryError from labelbox.orm import query from labelbox.orm.db_object import DbObject, Updateable, Deletable @@ -395,7 +395,7 @@ def _is_url_valid(url: str) -> bool: return bool(parsed.scheme) and bool(parsed.netloc) if _is_url_valid(annotations): - return BulkImportRequest.create_from_url( + return create_from_url( **shared_args, url=annotations, ) @@ -405,13 +405,13 @@ def _is_url_valid(url: str) -> bool: raise FileNotFoundError( f'{annotations} is not a valid url nor existing local file' ) - return BulkImportRequest.create_from_local_file( + return create_from_local_file( **shared_args, file=path, validate_file=True, ) else: - return BulkImportRequest.create_from_objects( + return create_from_objects( **shared_args, predictions=annotations, ) From 958651c37272c3d8ea5e081094918c07aa2a0a29 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:26:12 -0700 Subject: [PATCH 15/32] mypy --- labelbox/schema/project.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 50a366c96..d256b372a 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -374,12 +374,6 @@ def upload_annotations( BulkImportRequest """ - shared_args = { - 'client': self.client, - 'project_id': self.uid, - 'name': name, - } - if isinstance(annotations, str): def _is_url_valid(url: str) -> bool: @@ -396,7 +390,9 @@ def _is_url_valid(url: str) -> bool: if _is_url_valid(annotations): return create_from_url( - **shared_args, + client=self.client, + project_id=self.uid, + name=name, url=annotations, ) else: @@ -406,14 +402,18 @@ def _is_url_valid(url: str) -> bool: f'{annotations} is not a valid url nor existing local file' ) return create_from_local_file( - **shared_args, + client=self.client, + project_id=self.uid, + name=name, file=path, validate_file=True, ) else: return create_from_objects( - **shared_args, - predictions=annotations, + client=self.client, + project_id=self.uid, + name=name, + predictions=annotations, # type: ignore ) From 068aa45c58adfeed03181d0282023dacccb1c4d6 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:32:34 -0700 Subject: [PATCH 16/32] mpypy --- labelbox/schema/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index d256b372a..290e5559f 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -360,7 +360,7 @@ def upload_annotations( self, name: str, annotations: Union[str, Union[str, Path], Iterable[dict]], - ) -> BulkImportRequest: + ) -> 'BulkImportRequest': """ Uploads annotations to a project. Args: From 393d87ce0be2b980cc6bf05e5ac2f436d679cdde Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:34:59 -0700 Subject: [PATCH 17/32] ignore --- labelbox/schema/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 290e5559f..a2a09cf39 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -360,7 +360,7 @@ def upload_annotations( self, name: str, annotations: Union[str, Union[str, Path], Iterable[dict]], - ) -> 'BulkImportRequest': + ) -> 'BulkImportRequest': # type: ignore """ Uploads annotations to a project. Args: From a0553facdda3446b1cee7f37467c4065df8491e9 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:41:21 -0700 Subject: [PATCH 18/32] mypy --- labelbox/schema/bulk_import_request.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index b1cb9769a..a96cad7ff 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -12,7 +12,7 @@ import requests import labelbox.exceptions -from labelbox import Client +#from labelbox import Client #from labelbox import Project from labelbox import User from labelbox.orm import query @@ -85,7 +85,7 @@ def __make_request_data(project_id: str, name: str, content_length: int, # TODO(gszpak): move it to client.py def __send_create_file_command( - client: Client, request_data: dict, file_name: str, + client: 'Client', request_data: dict, file_name: str, file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: response = requests.post( client.endpoint, @@ -125,7 +125,8 @@ class BulkImportRequest(DbObject): # TODO(gszpak): building query body should be handled by the client @staticmethod - def get(client: Client, project_id: str, name: str) -> 'BulkImportRequest': + def get(client: 'Client', project_id: str, + name: str) -> 'BulkImportRequest': """ Fetches existing BulkImportRequest. @@ -202,7 +203,7 @@ def created_by(self): # type: ignore return None @staticmethod - def from_result(client: Client, result: dict) -> 'BulkImportRequest': + def from_result(client: 'Client', result: dict) -> 'BulkImportRequest': project = result.pop("project") user = result.pop("createdBy") bulk_import_request = BulkImportRequest(client, result) @@ -214,7 +215,7 @@ def from_result(client: Client, result: dict) -> 'BulkImportRequest': return bulk_import_request -def create_from_url(client: Client, project_id: str, name: str, +def create_from_url(client: 'Client', project_id: str, name: str, url: str) -> BulkImportRequest: """ Creates a BulkImportRequest from a publicly accessible URL @@ -245,7 +246,7 @@ def create_from_url(client: Client, project_id: str, name: str, client, bulk_import_request_response["createBulkImportRequest"]) -def create_from_objects(client: Client, project_id: str, name: str, +def create_from_objects(client: 'Client', project_id: str, name: str, predictions: Iterable[dict]) -> BulkImportRequest: """ Creates a BulkImportRequest from an iterable of dictionaries conforming to @@ -287,7 +288,7 @@ def create_from_objects(client: Client, project_id: str, name: str, client, response_data["createBulkImportRequest"]) -def create_from_local_file(client: Client, +def create_from_local_file(client: 'Client', project_id: str, name: str, file: Path, From 02eed78362e5525d9a64dd42f3e6057bb7b3fcb3 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 19:43:53 -0700 Subject: [PATCH 19/32] mypy --- labelbox/schema/bulk_import_request.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index a96cad7ff..165835444 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -85,7 +85,7 @@ def __make_request_data(project_id: str, name: str, content_length: int, # TODO(gszpak): move it to client.py def __send_create_file_command( - client: 'Client', request_data: dict, file_name: str, + client, request_data: dict, file_name: str, file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: response = requests.post( client.endpoint, @@ -125,8 +125,7 @@ class BulkImportRequest(DbObject): # TODO(gszpak): building query body should be handled by the client @staticmethod - def get(client: 'Client', project_id: str, - name: str) -> 'BulkImportRequest': + def get(client, project_id: str, name: str) -> 'BulkImportRequest': """ Fetches existing BulkImportRequest. @@ -203,7 +202,7 @@ def created_by(self): # type: ignore return None @staticmethod - def from_result(client: 'Client', result: dict) -> 'BulkImportRequest': + def from_result(client, result: dict) -> 'BulkImportRequest': project = result.pop("project") user = result.pop("createdBy") bulk_import_request = BulkImportRequest(client, result) @@ -215,7 +214,7 @@ def from_result(client: 'Client', result: dict) -> 'BulkImportRequest': return bulk_import_request -def create_from_url(client: 'Client', project_id: str, name: str, +def create_from_url(client, project_id: str, name: str, url: str) -> BulkImportRequest: """ Creates a BulkImportRequest from a publicly accessible URL @@ -246,7 +245,7 @@ def create_from_url(client: 'Client', project_id: str, name: str, client, bulk_import_request_response["createBulkImportRequest"]) -def create_from_objects(client: 'Client', project_id: str, name: str, +def create_from_objects(client, project_id: str, name: str, predictions: Iterable[dict]) -> BulkImportRequest: """ Creates a BulkImportRequest from an iterable of dictionaries conforming to @@ -288,7 +287,7 @@ def create_from_objects(client: 'Client', project_id: str, name: str, client, response_data["createBulkImportRequest"]) -def create_from_local_file(client: 'Client', +def create_from_local_file(client, project_id: str, name: str, file: Path, From 618e73fc5ed6d5d06a79d991d7e0ba6812fb01b9 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 23:20:08 -0700 Subject: [PATCH 20/32] clean up --- labelbox/schema/bulk_import_request.py | 174 ++++++++++++------------- 1 file changed, 81 insertions(+), 93 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 165835444..51b1507bf 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -12,8 +12,7 @@ import requests import labelbox.exceptions -#from labelbox import Client -#from labelbox import Project +from labelbox import Project from labelbox import User from labelbox.orm import query from labelbox.orm.db_object import DbObject @@ -33,86 +32,18 @@ def __make_file_name(project_id: str, name: str) -> str: def __build_results_query_part() -> str: return """ project { - name - description - updatedAt - createdAt - setupComplete - lastActivityTime - autoAuditNumberOfLabels - autoAuditPercentage + %s } createdBy { %s } %s """ % ( - #query.results_query_part(Project), + query.results_query_part(Project), query.results_query_part(User), query.results_query_part(BulkImportRequest)) -# TODO(gszpak): move it to client.py -def __make_request_data(project_id: str, name: str, content_length: int, - file_name: str) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % __build_results_query_part() - variables = { - "projectId": project_id, - "name": name, - "file": None, - "contentLength": content_length - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})) - } - - -# TODO(gszpak): move it to client.py -def __send_create_file_command( - client, request_data: dict, file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: - response = requests.post( - client.endpoint, - headers={"authorization": "Bearer %s" % client.api_key}, - data=request_data, - files={file_name: file_data}) - - try: - response_json = response.json() - except ValueError: - raise labelbox.exceptions.LabelboxError( - "Failed to parse response as JSON: %s" % response.text) - - response_data = response_json.get("data", None) - if response_data is None: - raise labelbox.exceptions.LabelboxError( - "Failed to upload, message: %s" % response_json.get("errors", None)) - - if not response_data.get("createBulkImportRequest", None): - raise labelbox.exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" % - response_json.get("errors", None) or - response_data.get("error", None)) - - return response_data - - class BulkImportRequest(DbObject): project = Relationship.ToOne("Project") name = Field.String("name") @@ -124,8 +55,7 @@ class BulkImportRequest(DbObject): state = Field.Enum(BulkImportRequestState, "state") # TODO(gszpak): building query body should be handled by the client - @staticmethod - def get(client, project_id: str, name: str) -> 'BulkImportRequest': + def get(self, project_id: str, name: str) -> 'BulkImportRequest': """ Fetches existing BulkImportRequest. @@ -147,7 +77,7 @@ def get(client, project_id: str, name: str) -> 'BulkImportRequest': } """ % __build_results_query_part() params = {"projectId": project_id, "name": name} - bulk_import_request_kwargs = client.execute( + bulk_import_request_kwargs = self.client.execute( query_str, params=params).get("bulkImportRequest") if bulk_import_request_kwargs is None: raise labelbox.exceptions.ResourceNotFoundError( @@ -155,7 +85,7 @@ def get(client, project_id: str, name: str) -> 'BulkImportRequest': "projectId": project_id, "name": name }) - return BulkImportRequest.from_result(client, bulk_import_request_kwargs) + return BulkImportRequest.from_result(self.client, bulk_import_request_kwargs) def refresh(self) -> None: """ @@ -201,16 +131,74 @@ def created_by(self): # type: ignore return self.__user return None - @staticmethod - def from_result(client, result: dict) -> 'BulkImportRequest': + # TODO(gszpak): move it to client.py + def __make_request_data(self, name: str, content_length: int, + file_name: str) -> dict: + query_str = """mutation createBulkImportRequestFromFilePyApi( + $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + filePayload: { + file: $file, + contentLength: $contentLength + } + }) { + %s + } + } + """ % __build_results_query_part() + variables = { + "projectId": self.project.uid, + "name": name, + "file": None, + "contentLength": content_length + } + operations = json.dumps({"variables": variables, "query": query_str}) + + return { + "operations": operations, + "map": (None, json.dumps({file_name: ["variables.file"]})) + } + + # TODO(gszpak): move it to client.py + def __send_create_file_command( + self, request_data: dict, file_name: str, + file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: + response = requests.post( + self.client.endpoint, + headers={"authorization": "Bearer %s" % self.client.api_key}, + data=request_data, + files={file_name: file_data}) + + try: + response_json = response.json() + except ValueError: + raise labelbox.exceptions.LabelboxError( + "Failed to parse response as JSON: %s" % response.text) + + response_data = response_json.get("data", None) + if response_data is None: + raise labelbox.exceptions.LabelboxError( + "Failed to upload, message: %s" % response_json.get("errors", None)) + + if not response_data.get("createBulkImportRequest", None): + raise labelbox.exceptions.LabelboxError( + "Failed to create BulkImportRequest, message: %s" % + response_json.get("errors", None) or + response_data.get("error", None)) + + return response_data + + def from_result(result: dict) -> 'BulkImportRequest': project = result.pop("project") user = result.pop("createdBy") - bulk_import_request = BulkImportRequest(client, result) + bulk_import_request = BulkImportRequest(self.client, result) if project is not None: bulk_import_request.__project = Project( # type: ignore - client, project) + self.client, project) if user is not None: - bulk_import_request.__user = User(client, user) # type: ignore + bulk_import_request.__user = User(self.client, user) # type: ignore return bulk_import_request @@ -275,13 +263,12 @@ def create_from_objects(client, project_id: str, name: str, data_str = ndjson.dumps(predictions) data = data_str.encode('utf-8') file_name = __make_file_name(project_id, name) - request_data = __make_request_data(project_id, name, len(data_str), - file_name) + request_data = self.__make_request_data(name, len(data_str), file_name) file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = __send_create_file_command(client=client, - request_data=request_data, - file_name=file_name, - file_data=file_data) + response_data = self.__send_create_file_command( + request_data=request_data, + file_name=file_name, + file_data=file_data) return BulkImportRequest.from_result( client, response_data["createBulkImportRequest"]) @@ -307,22 +294,23 @@ def create_from_local_file(client, """ file_name = __make_file_name(project_id, name) content_length = file.stat().st_size - request_data = __make_request_data(project_id, name, content_length, - file_name) + request_data = self.__make_request_data(name, content_length, file_name) if validate_file: with file.open('rb') as f: reader = ndjson.reader(f) + # ensure that the underlying json load call is valid + # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 + # by iterating through the file so we only store + # each line in memory rather than the entire file try: for line in reader: - # ensure that the underlying json load call is valid - # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 pass except ValueError: raise ValueError(f"{file} is not a valid ndjson file") with file.open('rb') as f: file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = __send_create_file_command(client, request_data, - file_name, file_data) + response_data = self.__send_create_file_command( + request_data, file_name, file_data) return BulkImportRequest.from_result( client, response_data["createBulkImportRequest"]) From 6d77d8d31909574c05eb347a55b70335609dcdb3 Mon Sep 17 00:00:00 2001 From: rllin Date: Wed, 5 Aug 2020 23:27:37 -0700 Subject: [PATCH 21/32] clean up --- labelbox/schema/bulk_import_request.py | 158 +++++++++++++------------ 1 file changed, 80 insertions(+), 78 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 51b1507bf..2f0ad4590 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -38,10 +38,69 @@ def __build_results_query_part() -> str: %s } %s - """ % ( - query.results_query_part(Project), - query.results_query_part(User), - query.results_query_part(BulkImportRequest)) + """ % (query.results_query_part(Project), query.results_query_part(User), + query.results_query_part(BulkImportRequest)) + + +# TODO(gszpak): move it to client.py +def __make_request_data(project_id: str, name: str, content_length: int, + file_name: str) -> dict: + query_str = """mutation createBulkImportRequestFromFilePyApi( + $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + filePayload: { + file: $file, + contentLength: $contentLength + } + }) { + %s + } + } + """ % __build_results_query_part() + variables = { + "projectId": project_id, + "name": name, + "file": None, + "contentLength": content_length + } + operations = json.dumps({"variables": variables, "query": query_str}) + + return { + "operations": operations, + "map": (None, json.dumps({file_name: ["variables.file"]})) + } + + +# TODO(gszpak): move it to client.py +def __send_create_file_command( + client, request_data: dict, file_name: str, + file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: + response = requests.post( + client.endpoint, + headers={"authorization": "Bearer %s" % client.api_key}, + data=request_data, + files={file_name: file_data}) + + try: + response_json = response.json() + except ValueError: + raise labelbox.exceptions.LabelboxError( + "Failed to parse response as JSON: %s" % response.text) + + response_data = response_json.get("data", None) + if response_data is None: + raise labelbox.exceptions.LabelboxError( + "Failed to upload, message: %s" % response_json.get("errors", None)) + + if not response_data.get("createBulkImportRequest", None): + raise labelbox.exceptions.LabelboxError( + "Failed to create BulkImportRequest, message: %s" % + response_json.get("errors", None) or + response_data.get("error", None)) + + return response_data class BulkImportRequest(DbObject): @@ -85,14 +144,14 @@ def get(self, project_id: str, name: str) -> 'BulkImportRequest': "projectId": project_id, "name": name }) - return BulkImportRequest.from_result(self.client, bulk_import_request_kwargs) + return BulkImportRequest.from_result(self.client, + bulk_import_request_kwargs) def refresh(self) -> None: """ Synchronizes values of all fields with the database. """ - bulk_import_request = self.get(self.client, - self.project().uid, self.name) + bulk_import_request = self.get(self.project().uid, self.name) for field in self.fields(): setattr(self, field.name, getattr(bulk_import_request, field.name)) @@ -131,74 +190,15 @@ def created_by(self): # type: ignore return self.__user return None - # TODO(gszpak): move it to client.py - def __make_request_data(self, name: str, content_length: int, - file_name: str) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % __build_results_query_part() - variables = { - "projectId": self.project.uid, - "name": name, - "file": None, - "contentLength": content_length - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})) - } - - # TODO(gszpak): move it to client.py - def __send_create_file_command( - self, request_data: dict, file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: - response = requests.post( - self.client.endpoint, - headers={"authorization": "Bearer %s" % self.client.api_key}, - data=request_data, - files={file_name: file_data}) - - try: - response_json = response.json() - except ValueError: - raise labelbox.exceptions.LabelboxError( - "Failed to parse response as JSON: %s" % response.text) - - response_data = response_json.get("data", None) - if response_data is None: - raise labelbox.exceptions.LabelboxError( - "Failed to upload, message: %s" % response_json.get("errors", None)) - - if not response_data.get("createBulkImportRequest", None): - raise labelbox.exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" % - response_json.get("errors", None) or - response_data.get("error", None)) - - return response_data - - def from_result(result: dict) -> 'BulkImportRequest': + def from_result(client, result: dict) -> 'BulkImportRequest': project = result.pop("project") user = result.pop("createdBy") - bulk_import_request = BulkImportRequest(self.client, result) + bulk_import_request = BulkImportRequest(client, result) if project is not None: bulk_import_request.__project = Project( # type: ignore - self.client, project) + client, project) if user is not None: - bulk_import_request.__user = User(self.client, user) # type: ignore + bulk_import_request.__user = User(client, user) # type: ignore return bulk_import_request @@ -263,12 +263,13 @@ def create_from_objects(client, project_id: str, name: str, data_str = ndjson.dumps(predictions) data = data_str.encode('utf-8') file_name = __make_file_name(project_id, name) - request_data = self.__make_request_data(name, len(data_str), file_name) + request_data = __make_request_data(project_id, name, len(data_str), + file_name) file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = self.__send_create_file_command( - request_data=request_data, - file_name=file_name, - file_data=file_data) + response_data = __send_create_file_command(client, + request_data=request_data, + file_name=file_name, + file_data=file_data) return BulkImportRequest.from_result( client, response_data["createBulkImportRequest"]) @@ -294,7 +295,8 @@ def create_from_local_file(client, """ file_name = __make_file_name(project_id, name) content_length = file.stat().st_size - request_data = self.__make_request_data(name, content_length, file_name) + request_data = __make_request_data(project_id, name, content_length, + file_name) if validate_file: with file.open('rb') as f: reader = ndjson.reader(f) @@ -310,7 +312,7 @@ def create_from_local_file(client, with file.open('rb') as f: file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = self.__send_create_file_command( - request_data, file_name, file_data) + response_data = __send_create_file_command(client, request_data, + file_name, file_data) return BulkImportRequest.from_result( client, response_data["createBulkImportRequest"]) From 01596fa6b728be13ba555b20470fb68afbe61e21 Mon Sep 17 00:00:00 2001 From: rllin Date: Fri, 7 Aug 2020 11:32:29 -0700 Subject: [PATCH 22/32] clean up --- .github/workflows/python-package.yml | 13 ++++-- labelbox/schema/bulk_import_request.py | 64 +------------------------- tests/integration/conftest.py | 8 ++-- 3 files changed, 15 insertions(+), 70 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6a24eb8f9..8b6b81775 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -48,11 +48,16 @@ jobs: - name: Test with tox env: # make sure to tell tox to use these environs in tox.ini - LABELBOX_TEST_API_KEY: ${{ secrets.LABELBOX_API_KEY }} - LABELBOX_TEST_ENDPOINT: "https://api.labelbox.com/graphql" + #LABELBOX_TEST_API_KEY: ${{ secrets.LABELBOX_API_KEY }} + #LABELBOX_TEST_ENDPOINT: "https://api.labelbox.com/graphql" # TODO: create a staging environment (develop) # we only test against prod right now because the merges are right into # the main branch which is develop right now - LABELBOX_TEST_ENVIRON: "PROD" + #LABELBOX_TEST_ENVIRON: "PROD" + # + # randall+staging-python@labelbox.com + LABELBOX_TEST_API_KEY: ${{ secrets.STAGING_LABELBOX_API_KEY }} + LABELBOX_TEST_ENDPOINT: "https://staging-api.labelbox.com/graphql" + LABELBOX_TEST_ENVIRON: "STAGING" run: | - tox -- -svv \ No newline at end of file + tox -- -svv diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 2f0ad4590..17e7751ad 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -28,20 +28,6 @@ def __make_file_name(project_id: str, name: str) -> str: return f"{project_id}__{name}.ndjson" -# TODO(gszpak): all the code below should be handled automatically by Relationship -def __build_results_query_part() -> str: - return """ - project { - %s - } - createdBy { - %s - } - %s - """ % (query.results_query_part(Project), query.results_query_part(User), - query.results_query_part(BulkImportRequest)) - - # TODO(gszpak): move it to client.py def __make_request_data(project_id: str, name: str, content_length: int, file_name: str) -> dict: @@ -58,7 +44,7 @@ def __make_request_data(project_id: str, name: str, content_length: int, %s } } - """ % __build_results_query_part() + """ % query.results_query_part(BulkImportRequest) variables = { "projectId": project_id, "name": name, @@ -113,40 +99,6 @@ class BulkImportRequest(DbObject): status_file_url = Field.String("status_file_url") state = Field.Enum(BulkImportRequestState, "state") - # TODO(gszpak): building query body should be handled by the client - def get(self, project_id: str, name: str) -> 'BulkImportRequest': - """ - Fetches existing BulkImportRequest. - - Args: - client (Client): a Labelbox client - project_id (str): BulkImportRequest's project id - name (str): name of BulkImportRequest - Returns: - BulkImportRequest object - """ - query_str = """query getBulkImportRequestPyApi( - $projectId: ID!, $name: String!) { - bulkImportRequest(where: { - projectId: $projectId, - name: $name - }) { - %s - } - } - """ % __build_results_query_part() - params = {"projectId": project_id, "name": name} - bulk_import_request_kwargs = self.client.execute( - query_str, params=params).get("bulkImportRequest") - if bulk_import_request_kwargs is None: - raise labelbox.exceptions.ResourceNotFoundError( - BulkImportRequest, { - "projectId": project_id, - "name": name - }) - return BulkImportRequest.from_result(self.client, - bulk_import_request_kwargs) - def refresh(self) -> None: """ Synchronizes values of all fields with the database. @@ -178,18 +130,6 @@ def wait_until_done(self, sleep_time_seconds: int = 30) -> None: def __exponential_backoff_refresh(self) -> None: self.refresh() - # TODO(gszpak): project() and created_by() methods - # TODO(gszpak): are hacky ways to eagerly load the relationships - def project(self): # type: ignore - if self.__project is not None: - return self.__project - return None - - def created_by(self): # type: ignore - if self.__user is not None: - return self.__user - return None - def from_result(client, result: dict) -> 'BulkImportRequest': project = result.pop("project") user = result.pop("createdBy") @@ -226,7 +166,7 @@ def create_from_url(client, project_id: str, name: str, %s } } - """ % __build_results_query_part() + """ % query.results_query_part(BulkImportRequest) params = {"projectId": project_id, "name": name, "fileUrl": url} bulk_import_request_response = client.execute(query_str, params=params) return BulkImportRequest.from_result( diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 8babdd6b2..0596e28f9 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -16,8 +16,8 @@ class IntegrationClient(Client): def __init__(self): - api_url = os.environ.get("LABELBOX_TEST_ENDPOINT", - "https://staging-api.labelbox.com/graphql") + api_url = os.environ["LABELBOX_TEST_ENDPOINT"] + #"https://staging-api.labelbox.com/graphql") super().__init__(os.environ["LABELBOX_TEST_API_KEY"], api_url) self.queries = [] @@ -94,10 +94,10 @@ def environ() -> Environ: """ try: - #return Environ(os.environ['LABELBOX_TEST_ENVIRON']) + return Environ(os.environ['LABELBOX_TEST_ENVIRON']) # TODO: for some reason all other environs can be set but # this one cannot in github actions - return Environ.PROD + #return Environ.PROD except KeyError: raise Exception(f'Missing LABELBOX_TEST_ENVIRON in: {os.environ}') From 5aa3f1c5bf39c9688961d02ee89d86cdf840bdf1 Mon Sep 17 00:00:00 2001 From: rllin Date: Fri, 7 Aug 2020 11:32:42 -0700 Subject: [PATCH 23/32] cleanup --- tests/integration/conftest.py | 48 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 0596e28f9..fc7d50f9f 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -12,13 +12,36 @@ IMG_URL = "https://picsum.photos/200/300" +class Environ(Enum): + PROD = 'prod' + STAGING = 'staging' + + +@pytest.fixture +def environ() -> Environ: + """ + Checks environment variables for LABELBOX_ENVIRON to be + 'prod' or 'staging' + + Make sure to set LABELBOX_TEST_ENVIRON in .github/workflows/python-package.yaml + + """ + try: + return Environ(os.environ['LABELBOX_TEST_ENVIRON']) + # TODO: for some reason all other environs can be set but + # this one cannot in github actions + #return Environ.PROD + except KeyError: + raise Exception(f'Missing LABELBOX_TEST_ENVIRON in: {os.environ}') + class IntegrationClient(Client): def __init__(self): api_url = os.environ["LABELBOX_TEST_ENDPOINT"] + api_key = os.environ["LABELBOX_TEST_API_KEY"] #"https://staging-api.labelbox.com/graphql") - super().__init__(os.environ["LABELBOX_TEST_API_KEY"], api_url) + super().__init__(api_key, api_url) self.queries = [] @@ -79,29 +102,6 @@ def label_pack(project, rand_gen): dataset.delete() -class Environ(Enum): - PROD = 'prod' - STAGING = 'staging' - - -@pytest.fixture -def environ() -> Environ: - """ - Checks environment variables for LABELBOX_ENVIRON to be - 'prod' or 'staging' - - Make sure to set LABELBOX_TEST_ENVIRON in .github/workflows/python-package.yaml - - """ - try: - return Environ(os.environ['LABELBOX_TEST_ENVIRON']) - # TODO: for some reason all other environs can be set but - # this one cannot in github actions - #return Environ.PROD - except KeyError: - raise Exception(f'Missing LABELBOX_TEST_ENVIRON in: {os.environ}') - - @pytest.fixture def iframe_url(environ) -> str: return { From 1ec20197f66bdd6556cb984efa38dc16898d2a58 Mon Sep 17 00:00:00 2001 From: rllin Date: Fri, 7 Aug 2020 15:26:15 -0700 Subject: [PATCH 24/32] clean up --- labelbox/schema/bulk_import_request.py | 27 ++++++++------------------ tests/integration/conftest.py | 3 ++- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 17e7751ad..112054643 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -11,6 +11,7 @@ import ndjson import requests +from labelbox import utils import labelbox.exceptions from labelbox import Project from labelbox import User @@ -103,9 +104,10 @@ def refresh(self) -> None: """ Synchronizes values of all fields with the database. """ - bulk_import_request = self.get(self.project().uid, self.name) - for field in self.fields(): - setattr(self, field.name, getattr(bulk_import_request, field.name)) + query_str, params = query.get_single(BulkImportRequest, self.uid) + res = self.client.execute(query_str, params) + res = res[utils.camel_case(BulkImportRequest.type_name())] + self._set_field_values(res) def wait_until_done(self, sleep_time_seconds: int = 30) -> None: """ @@ -130,17 +132,6 @@ def wait_until_done(self, sleep_time_seconds: int = 30) -> None: def __exponential_backoff_refresh(self) -> None: self.refresh() - def from_result(client, result: dict) -> 'BulkImportRequest': - project = result.pop("project") - user = result.pop("createdBy") - bulk_import_request = BulkImportRequest(client, result) - if project is not None: - bulk_import_request.__project = Project( # type: ignore - client, project) - if user is not None: - bulk_import_request.__user = User(client, user) # type: ignore - return bulk_import_request - def create_from_url(client, project_id: str, name: str, url: str) -> BulkImportRequest: @@ -169,7 +160,7 @@ def create_from_url(client, project_id: str, name: str, """ % query.results_query_part(BulkImportRequest) params = {"projectId": project_id, "name": name, "fileUrl": url} bulk_import_request_response = client.execute(query_str, params=params) - return BulkImportRequest.from_result( + return BulkImportRequest( client, bulk_import_request_response["createBulkImportRequest"]) @@ -211,8 +202,7 @@ def create_from_objects(client, project_id: str, name: str, file_name=file_name, file_data=file_data) - return BulkImportRequest.from_result( - client, response_data["createBulkImportRequest"]) + return BulkImportRequest(client, response_data["createBulkImportRequest"]) def create_from_local_file(client, @@ -254,5 +244,4 @@ def create_from_local_file(client, file_data = (file.name, f, NDJSON_MIME_TYPE) response_data = __send_create_file_command(client, request_data, file_name, file_data) - return BulkImportRequest.from_result( - client, response_data["createBulkImportRequest"]) + return BulkImportRequest(client, response_data["createBulkImportRequest"]) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index fc7d50f9f..71ef87186 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -12,6 +12,7 @@ IMG_URL = "https://picsum.photos/200/300" + class Environ(Enum): PROD = 'prod' STAGING = 'staging' @@ -40,7 +41,7 @@ class IntegrationClient(Client): def __init__(self): api_url = os.environ["LABELBOX_TEST_ENDPOINT"] api_key = os.environ["LABELBOX_TEST_API_KEY"] - #"https://staging-api.labelbox.com/graphql") + #"https://staging-api.labelbox.com/graphql") super().__init__(api_key, api_url) self.queries = [] From 63f48641f7bc8e9f28aaa7c37aca15b50b974c37 Mon Sep 17 00:00:00 2001 From: rllin Date: Fri, 7 Aug 2020 15:31:22 -0700 Subject: [PATCH 25/32] remove imports --- labelbox/schema/bulk_import_request.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 112054643..88f67a50f 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -13,8 +13,6 @@ from labelbox import utils import labelbox.exceptions -from labelbox import Project -from labelbox import User from labelbox.orm import query from labelbox.orm.db_object import DbObject from labelbox.orm.model import Field From aa75d7a15a0ac581908169adc16eb01ad55d926d Mon Sep 17 00:00:00 2001 From: rllin Date: Fri, 7 Aug 2020 15:40:26 -0700 Subject: [PATCH 26/32] staging --- .github/workflows/python-package.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 8b6b81775..bde7e0b9c 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -53,11 +53,11 @@ jobs: # TODO: create a staging environment (develop) # we only test against prod right now because the merges are right into # the main branch which is develop right now - #LABELBOX_TEST_ENVIRON: "PROD" + #LABELBOX_TEST_ENVIRON: "prod" # # randall+staging-python@labelbox.com LABELBOX_TEST_API_KEY: ${{ secrets.STAGING_LABELBOX_API_KEY }} LABELBOX_TEST_ENDPOINT: "https://staging-api.labelbox.com/graphql" - LABELBOX_TEST_ENVIRON: "STAGING" + LABELBOX_TEST_ENVIRON: "staging" run: | tox -- -svv From d6eed9ff6a92bada8b379f9a3fc7fce8b664bef8 Mon Sep 17 00:00:00 2001 From: rllin Date: Mon, 10 Aug 2020 10:43:23 -0700 Subject: [PATCH 27/32] seek rather than open 2x --- labelbox/schema/bulk_import_request.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 88f67a50f..96017d699 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -220,13 +220,15 @@ def create_from_local_file(client, if `file` is a valid ndjson file Returns: BulkImportRequest object + """ file_name = __make_file_name(project_id, name) content_length = file.stat().st_size request_data = __make_request_data(project_id, name, content_length, file_name) - if validate_file: - with file.open('rb') as f: + + with file.open('rb') as f: + if validate_file: reader = ndjson.reader(f) # ensure that the underlying json load call is valid # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 @@ -237,8 +239,8 @@ def create_from_local_file(client, pass except ValueError: raise ValueError(f"{file} is not a valid ndjson file") - - with file.open('rb') as f: + else: + file.seek(0) file_data = (file.name, f, NDJSON_MIME_TYPE) response_data = __send_create_file_command(client, request_data, file_name, file_data) From 60dbed3afb9634bd2b0df5009d2d31bb412bafb4 Mon Sep 17 00:00:00 2001 From: rllin Date: Mon, 10 Aug 2020 14:12:58 -0700 Subject: [PATCH 28/32] reorder --- labelbox/schema/bulk_import_request.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 96017d699..bd90113ef 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -89,14 +89,15 @@ def __send_create_file_command( class BulkImportRequest(DbObject): - project = Relationship.ToOne("Project") name = Field.String("name") - created_at = Field.DateTime("created_at") - created_by = Relationship.ToOne("User", False, "created_by") + state = Field.Enum(BulkImportRequestState, "state") input_file_url = Field.String("input_file_url") error_file_url = Field.String("error_file_url") status_file_url = Field.String("status_file_url") - state = Field.Enum(BulkImportRequestState, "state") + created_at = Field.DateTime("created_at") + + project = Relationship.ToOne("Project") + created_by = Relationship.ToOne("User", False, "created_by") def refresh(self) -> None: """ From cf50af233ec65ad078772ff4d689de97945f2744 Mon Sep 17 00:00:00 2001 From: rllin Date: Mon, 10 Aug 2020 14:16:03 -0700 Subject: [PATCH 29/32] fix seek -n --- labelbox/schema/bulk_import_request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index bd90113ef..37de2d80a 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -241,7 +241,7 @@ def create_from_local_file(client, except ValueError: raise ValueError(f"{file} is not a valid ndjson file") else: - file.seek(0) + f.seek(0) file_data = (file.name, f, NDJSON_MIME_TYPE) response_data = __send_create_file_command(client, request_data, file_name, file_data) From ed469bfac2a9418eddd137ecc9531484bc468e50 Mon Sep 17 00:00:00 2001 From: rllin Date: Mon, 10 Aug 2020 14:46:32 -0700 Subject: [PATCH 30/32] fix --- labelbox/schema/bulk_import_request.py | 258 ++++++++++-------- labelbox/schema/project.py | 8 +- tests/integration/test_bulk_import_request.py | 6 +- 3 files changed, 150 insertions(+), 122 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 37de2d80a..978931a90 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -23,12 +23,12 @@ logger = logging.getLogger(__name__) -def __make_file_name(project_id: str, name: str) -> str: +def _make_file_name(project_id: str, name: str) -> str: return f"{project_id}__{name}.ndjson" # TODO(gszpak): move it to client.py -def __make_request_data(project_id: str, name: str, content_length: int, +def _make_request_data(project_id: str, name: str, content_length: int, file_name: str) -> dict: query_str = """mutation createBulkImportRequestFromFilePyApi( $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { @@ -59,7 +59,7 @@ def __make_request_data(project_id: str, name: str, content_length: int, # TODO(gszpak): move it to client.py -def __send_create_file_command( +def _send_create_file_command( client, request_data: dict, file_name: str, file_data: Tuple[str, Union[bytes, BinaryIO], str]) -> dict: response = requests.post( @@ -131,118 +131,146 @@ def wait_until_done(self, sleep_time_seconds: int = 30) -> None: def __exponential_backoff_refresh(self) -> None: self.refresh() + @classmethod + def from_name(cls, client, project_id: str, name: str) -> 'BulkImportRequest': + """ Fetches existing BulkImportRequest. -def create_from_url(client, project_id: str, name: str, - url: str) -> BulkImportRequest: - """ - Creates a BulkImportRequest from a publicly accessible URL - to an ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - url (str): publicly accessible URL pointing to ndjson file containing predictions - Returns: - BulkImportRequest object - """ - query_str = """mutation createBulkImportRequestPyApi( - $projectId: ID!, $name: String!, $fileUrl: String!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - fileUrl: $fileUrl - }) { - %s + Args: + client (Client): a Labelbox client + project_id (str): BulkImportRequest's project id + name (str): name of BulkImportRequest + Returns: + BulkImportRequest object + + """ + query_str = """query getBulkImportRequestPyApi( + $projectId: ID!, $name: String!) { + bulkImportRequest(where: { + projectId: $projectId, + name: $name + }) { + %s + } } - } - """ % query.results_query_part(BulkImportRequest) - params = {"projectId": project_id, "name": name, "fileUrl": url} - bulk_import_request_response = client.execute(query_str, params=params) - return BulkImportRequest( - client, bulk_import_request_response["createBulkImportRequest"]) - - -def create_from_objects(client, project_id: str, name: str, - predictions: Iterable[dict]) -> BulkImportRequest: - """ - Creates a BulkImportRequest from an iterable of dictionaries conforming to - JSON predictions format, e.g.: - ``{ - "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", - "schemaId": "ckappz7d700gn0zbocmqkwd9i", - "dataRow": { - "id": "ck1s02fqxm8fi0757f0e6qtdc" - }, - "bbox": { - "top": 48, - "left": 58, - "height": 865, - "width": 1512 + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name} + response = client.execute(query_str, params=params) + return cls(client, response['bulkImportRequest']) + + @classmethod + def create_from_url(cls, client, project_id: str, name: str, + url: str) -> 'BulkImportRequest': + """ + Creates a BulkImportRequest from a publicly accessible URL + to an ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + url (str): publicly accessible URL pointing to ndjson file containing predictions + Returns: + BulkImportRequest object + """ + query_str = """mutation createBulkImportRequestPyApi( + $projectId: ID!, $name: String!, $fileUrl: String!) { + createBulkImportRequest(data: { + projectId: $projectId, + name: $name, + fileUrl: $fileUrl + }) { + %s + } } - }`` - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - predictions (Iterable[dict]): iterable of dictionaries representing predictions - Returns: - BulkImportRequest object - """ - data_str = ndjson.dumps(predictions) - data = data_str.encode('utf-8') - file_name = __make_file_name(project_id, name) - request_data = __make_request_data(project_id, name, len(data_str), - file_name) - file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = __send_create_file_command(client, - request_data=request_data, - file_name=file_name, - file_data=file_data) - - return BulkImportRequest(client, response_data["createBulkImportRequest"]) - - -def create_from_local_file(client, - project_id: str, - name: str, - file: Path, - validate_file=True) -> BulkImportRequest: - """ - Creates a BulkImportRequest from a local ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - file (Path): local ndjson file with predictions - validate_file (bool): a flag indicating if there should be a validation - if `file` is a valid ndjson file - Returns: - BulkImportRequest object - - """ - file_name = __make_file_name(project_id, name) - content_length = file.stat().st_size - request_data = __make_request_data(project_id, name, content_length, - file_name) - - with file.open('rb') as f: - if validate_file: - reader = ndjson.reader(f) - # ensure that the underlying json load call is valid - # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 - # by iterating through the file so we only store - # each line in memory rather than the entire file - try: - for line in reader: - pass - except ValueError: - raise ValueError(f"{file} is not a valid ndjson file") - else: - f.seek(0) - file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = __send_create_file_command(client, request_data, - file_name, file_data) - return BulkImportRequest(client, response_data["createBulkImportRequest"]) + """ % query.results_query_part(cls) + params = {"projectId": project_id, "name": name, "fileUrl": url} + bulk_import_request_response = client.execute(query_str, params=params) + print('query_str', query_str, params) + print('response data', bulk_import_request_response) + return cls(client, bulk_import_request_response["createBulkImportRequest"]) + + @classmethod + def create_from_objects(cls, client, project_id: str, name: str, + predictions: Iterable[dict]) -> 'BulkImportRequest': + """ + Creates a BulkImportRequest from an iterable of dictionaries conforming to + JSON predictions format, e.g.: + ``{ + "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", + "schemaId": "ckappz7d700gn0zbocmqkwd9i", + "dataRow": { + "id": "ck1s02fqxm8fi0757f0e6qtdc" + }, + "bbox": { + "top": 48, + "left": 58, + "height": 865, + "width": 1512 + } + }`` + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + predictions (Iterable[dict]): iterable of dictionaries representing predictions + Returns: + BulkImportRequest object + """ + data_str = ndjson.dumps(predictions) + data = data_str.encode('utf-8') + file_name = _make_file_name(project_id, name) + request_data = _make_request_data(project_id, name, len(data_str), + file_name) + file_data = (file_name, data, NDJSON_MIME_TYPE) + response_data = _send_create_file_command(client, + request_data=request_data, + file_name=file_name, + file_data=file_data) + + return cls(client, response_data["createBulkImportRequest"]) + + @classmethod + def create_from_local_file(cls, + client, + project_id: str, + name: str, + file: Path, + validate_file=True) -> 'BulkImportRequest': + """ + Creates a BulkImportRequest from a local ndjson file with predictions. + + Args: + client (Client): a Labelbox client + project_id (str): id of project for which predictions will be imported + name (str): name of BulkImportRequest + file (Path): local ndjson file with predictions + validate_file (bool): a flag indicating if there should be a validation + if `file` is a valid ndjson file + Returns: + BulkImportRequest object + + """ + file_name = _make_file_name(project_id, name) + content_length = file.stat().st_size + request_data = _make_request_data(project_id, name, content_length, + file_name) + + with file.open('rb') as f: + if validate_file: + reader = ndjson.reader(f) + # ensure that the underlying json load call is valid + # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 + # by iterating through the file so we only store + # each line in memory rather than the entire file + try: + for line in reader: + pass + except ValueError: + raise ValueError(f"{file} is not a valid ndjson file") + else: + f.seek(0) + file_data = (file.name, f, NDJSON_MIME_TYPE) + response_data = _send_create_file_command(client, request_data, + file_name, file_data) + return cls(client, response_data["createBulkImportRequest"]) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index a2a09cf39..d5bc9e5a7 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -8,7 +8,7 @@ from urllib.parse import urlparse from labelbox import utils -from labelbox.schema.bulk_import_request import create_from_url, create_from_objects, create_from_local_file +from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.exceptions import InvalidQueryError from labelbox.orm import query from labelbox.orm.db_object import DbObject, Updateable, Deletable @@ -389,7 +389,7 @@ def _is_url_valid(url: str) -> bool: return bool(parsed.scheme) and bool(parsed.netloc) if _is_url_valid(annotations): - return create_from_url( + return BulkImportRequest.create_from_url( client=self.client, project_id=self.uid, name=name, @@ -401,7 +401,7 @@ def _is_url_valid(url: str) -> bool: raise FileNotFoundError( f'{annotations} is not a valid url nor existing local file' ) - return create_from_local_file( + return BulkImportRequest.create_from_local_file( client=self.client, project_id=self.uid, name=name, @@ -409,7 +409,7 @@ def _is_url_valid(url: str) -> bool: validate_file=True, ) else: - return create_from_objects( + return BulkImportRequest.create_from_objects( client=self.client, project_id=self.uid, name=name, diff --git a/tests/integration/test_bulk_import_request.py b/tests/integration/test_bulk_import_request.py index 1f35a06c3..9cbd0afe4 100644 --- a/tests/integration/test_bulk_import_request.py +++ b/tests/integration/test_bulk_import_request.py @@ -79,7 +79,7 @@ def test_create_from_local_file(tmp_path, project): ndjson.dump(PREDICTIONS, f) bulk_import_request = project.upload_annotations(name=name, - annotations=file_path) + annotations=str(file_path)) assert bulk_import_request.project() == project assert bulk_import_request.name == name @@ -94,7 +94,7 @@ def test_get(client, project): url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" project.upload_annotations(name=name, annotations=url) - bulk_import_request = BulkImportRequest.get(client, project.uid, name) + bulk_import_request = BulkImportRequest.from_name(client, project_id=project.uid, name=name) assert bulk_import_request.project() == project assert bulk_import_request.name == name @@ -111,7 +111,7 @@ def test_validate_ndjson(tmp_path, project): f.write("test") with pytest.raises(ValueError): - project.upload_annotations(name="name", annotations=file_path) + project.upload_annotations(name="name", annotations=str(file_path)) @pytest.mark.slow From 40a90e9eac95963efe0fcb3028f056fdad2c4a90 Mon Sep 17 00:00:00 2001 From: rllin Date: Mon, 10 Aug 2020 14:49:17 -0700 Subject: [PATCH 31/32] yapf --- labelbox/schema/bulk_import_request.py | 20 ++++++++++--------- tests/integration/test_bulk_import_request.py | 4 +++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py index 978931a90..ef054cb48 100644 --- a/labelbox/schema/bulk_import_request.py +++ b/labelbox/schema/bulk_import_request.py @@ -29,7 +29,7 @@ def _make_file_name(project_id: str, name: str) -> str: # TODO(gszpak): move it to client.py def _make_request_data(project_id: str, name: str, content_length: int, - file_name: str) -> dict: + file_name: str) -> dict: query_str = """mutation createBulkImportRequestFromFilePyApi( $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { createBulkImportRequest(data: { @@ -132,7 +132,8 @@ def __exponential_backoff_refresh(self) -> None: self.refresh() @classmethod - def from_name(cls, client, project_id: str, name: str) -> 'BulkImportRequest': + def from_name(cls, client, project_id: str, + name: str) -> 'BulkImportRequest': """ Fetches existing BulkImportRequest. Args: @@ -187,7 +188,8 @@ def create_from_url(cls, client, project_id: str, name: str, bulk_import_request_response = client.execute(query_str, params=params) print('query_str', query_str, params) print('response data', bulk_import_request_response) - return cls(client, bulk_import_request_response["createBulkImportRequest"]) + return cls(client, + bulk_import_request_response["createBulkImportRequest"]) @classmethod def create_from_objects(cls, client, project_id: str, name: str, @@ -221,12 +223,12 @@ def create_from_objects(cls, client, project_id: str, name: str, data = data_str.encode('utf-8') file_name = _make_file_name(project_id, name) request_data = _make_request_data(project_id, name, len(data_str), - file_name) + file_name) file_data = (file_name, data, NDJSON_MIME_TYPE) response_data = _send_create_file_command(client, - request_data=request_data, - file_name=file_name, - file_data=file_data) + request_data=request_data, + file_name=file_name, + file_data=file_data) return cls(client, response_data["createBulkImportRequest"]) @@ -254,7 +256,7 @@ def create_from_local_file(cls, file_name = _make_file_name(project_id, name) content_length = file.stat().st_size request_data = _make_request_data(project_id, name, content_length, - file_name) + file_name) with file.open('rb') as f: if validate_file: @@ -272,5 +274,5 @@ def create_from_local_file(cls, f.seek(0) file_data = (file.name, f, NDJSON_MIME_TYPE) response_data = _send_create_file_command(client, request_data, - file_name, file_data) + file_name, file_data) return cls(client, response_data["createBulkImportRequest"]) diff --git a/tests/integration/test_bulk_import_request.py b/tests/integration/test_bulk_import_request.py index 9cbd0afe4..20ec7e095 100644 --- a/tests/integration/test_bulk_import_request.py +++ b/tests/integration/test_bulk_import_request.py @@ -94,7 +94,9 @@ def test_get(client, project): url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" project.upload_annotations(name=name, annotations=url) - bulk_import_request = BulkImportRequest.from_name(client, project_id=project.uid, name=name) + bulk_import_request = BulkImportRequest.from_name(client, + project_id=project.uid, + name=name) assert bulk_import_request.project() == project assert bulk_import_request.name == name From f3f17a47b15e28750b57295ea24bdbbc64e0563b Mon Sep 17 00:00:00 2001 From: rllin Date: Mon, 10 Aug 2020 16:09:48 -0700 Subject: [PATCH 32/32] prod --- .github/workflows/python-package.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index bde7e0b9c..1e1484503 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -48,16 +48,16 @@ jobs: - name: Test with tox env: # make sure to tell tox to use these environs in tox.ini - #LABELBOX_TEST_API_KEY: ${{ secrets.LABELBOX_API_KEY }} - #LABELBOX_TEST_ENDPOINT: "https://api.labelbox.com/graphql" + LABELBOX_TEST_API_KEY: ${{ secrets.LABELBOX_API_KEY }} + LABELBOX_TEST_ENDPOINT: "https://api.labelbox.com/graphql" # TODO: create a staging environment (develop) # we only test against prod right now because the merges are right into # the main branch which is develop right now - #LABELBOX_TEST_ENVIRON: "prod" + LABELBOX_TEST_ENVIRON: "prod" # # randall+staging-python@labelbox.com - LABELBOX_TEST_API_KEY: ${{ secrets.STAGING_LABELBOX_API_KEY }} - LABELBOX_TEST_ENDPOINT: "https://staging-api.labelbox.com/graphql" - LABELBOX_TEST_ENVIRON: "staging" + #LABELBOX_TEST_API_KEY: ${{ secrets.STAGING_LABELBOX_API_KEY }} + #LABELBOX_TEST_ENDPOINT: "https://staging-api.labelbox.com/graphql" + #LABELBOX_TEST_ENVIRON: "staging" run: | tox -- -svv