diff --git a/CHANGELOG.md b/CHANGELOG.md index cf11eaf69..9a02e91b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Version 2.4.1 (2020-07-22) +### Fixed +* `Dataset.create_data_row` and `Dataset.create_data_rows` will now upload with content type to ensure the Labelbox editor can show videos. + ## Version 2.4 (2020-01-30) ### Added diff --git a/CONTRIB.md b/CONTRIB.md new file mode 100644 index 000000000..730be2338 --- /dev/null +++ b/CONTRIB.md @@ -0,0 +1,56 @@ +# Labelbox Python SDK Contribution Guide + +## Repository Organization + +The SDK source (excluding tests and support tools) is organized into the +following packages/modules: +* `orm/` package contains code that supports the general mapping of Labelbox + data to Python objects. This includes base classes, attribute (field and + relationship) classes, generic GraphQL queries etc. +* `schema/` package contains definitions of classes which represent data type + (e.g. Project, Label etc.). It relies on `orm/` classes for easy and succinct + object definitions. It also contains custom functionalities and custom GraphQL + templates where necessary. +* `client.py` contains the `Client` class that's the client-side stub for + communicating with Labelbox servers. +* `exceptions.py` contains declarations for all Labelbox errors. +* `pagination.py` contains support for paginated relationship and collection + fetching. +* `utils.py` contains utility functions. + +## Branches + +* All development happens in per-feature branches prefixed by contributor's + initials. For example `fs/feature_name`. +* Approved PRs are merged to the `develop` branch. +* The `develop` branch is merged to `master` on each release. + +## Testing + +Currently the SDK functionality is tested using integration tests. These tests +communicate with a Labelbox server (by default the staging server) and are in +that sense not self-contained. Besides that they are organized like unit test +and are based on the `pytest` library. + +To execute tests you will need to provide an API key for the server you're using +for testing (staging by default) in the `LABELBOX_TEST_API_KEY` environment +variable. For more info see [Labelbox API key +docs](https://labelbox.helpdocs.io/docs/api/getting-started). + +## Release Steps + +Each release should follow the following steps: + +1. Update the Python SDK package version in `REPO_ROOT/setup.py` +2. Make sure the `CHANGELOG.md` contains appropriate info +3. Commit these changes and tag the commit in Git as `vX.Y` +4. Merge `develop` to `master` (fast-forward only). +5. Generate a GitHub release. +6. Build the library in the [standard + way](https://packaging.python.org/tutorials/packaging-projects/#generating-distribution-archives) +7. Upload the distribution archives in the [standard + way](https://packaging.python.org/tutorials/packaging-projects/#uploading-the-distribution-archives). +You will need credentials for the `labelbox` PyPI user. +8. Run the `REPO_ROOT/tools/api_reference_generator.py` script to update + [HelpDocs documentation](https://labelbox.helpdocs.io/docs/). You will need + to provide a HelpDocs API key for. diff --git a/README.md b/README.md index 8db5edbae..5f8e16f58 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Labelbox Python API +# Labelbox Python SDK Labelbox is the enterprise-grade training data solution with fast AI enabled labeling tools, labeling automation, human workforce, data management, a powerful API for integration & SDK for extensibility. Visit http://labelbox.com/ for more information. @@ -29,3 +29,6 @@ client = Client() ## Documentation [Visit our docs](https://labelbox.com/docs/python-api) to learn how to [create a project](https://labelbox.com/docs/python-api/create-first-project), read through some helpful user guides, and view our [API reference](https://labelbox.com/docs/python-api/api-reference). + +## Repo Organization and Contribution +Please consult `CONTRIB.md` diff --git a/labelbox/client.py b/labelbox/client.py index 2d3c4ccd0..738645b1c 100644 --- a/labelbox/client.py +++ b/labelbox/client.py @@ -1,6 +1,7 @@ from datetime import datetime, timezone import json import logging +import mimetypes import os import requests @@ -75,7 +76,7 @@ def execute(self, query, params=None, timeout=10.0): labelbox.exceptions.InvalidQueryError: If `query` is not syntactically or semantically valid (checked server-side). labelbox.exceptions.ApiLimitError: If the server API limit was - exceeded. See "How to import data" in the online documentation + exceeded. See "How to import data" in the online documentation to see API limits. labelbox.exceptions.TimeoutError: If response was not received in `timeout` seconds. @@ -112,14 +113,14 @@ def convert_value(value): raise labelbox.exceptions.NetworkError(e) except Exception as e: - logger.error("Unknown error: %s", str(e)) - raise labelbox.exceptions.LabelboxError(str(e)) + raise labelbox.exceptions.LabelboxError( + "Unknown error during Client.query(): " + str(e), e) try: response = response.json() except: raise labelbox.exceptions.LabelboxError( - "Failed to parse response as JSON: %s", response.text) + "Failed to parse response as JSON: %s" % response.text) errors = response.get("errors", []) @@ -171,9 +172,27 @@ def check_errors(keywords, *path): return response["data"] + def upload_file(self, path): + """Uploads given path to local file. + + Also includes best guess at the content type of the file. + + Args: + path (str): path to local file to be uploaded. + Returns: + str, the URL of uploaded data. + Raises: + labelbox.exceptions.LabelboxError: If upload failed. + + """ + content_type, _ = mimetypes.guess_type(path) + basename = os.path.basename(path) + with open(path, "rb") as f: + return self.upload_data(data=(basename, f.read(), content_type)) + def upload_data(self, data): """ Uploads the given data (bytes) to Labelbox. - + Args: data (bytes): The data to upload. Returns: @@ -183,8 +202,8 @@ def upload_data(self, data): """ request_data = { "operations": json.dumps({ - "variables": {"file": None, "contentLength": len(data), "sign": False}, - "query": """mutation UploadFile($file: Upload!, $contentLength: Int!, + "variables": {"file": None, "contentLength": len(data), "sign": False}, + "query": """mutation UploadFile($file: Upload!, $contentLength: Int!, $sign: Boolean) { uploadFile(file: $file, contentLength: $contentLength, sign: $sign) {url filename} } """,}), @@ -199,9 +218,9 @@ def upload_data(self, data): try: file_data = response.json().get("data", None) - except ValueError: # response is not valid JSON + except ValueError as e: # response is not valid JSON raise labelbox.exceptions.LabelboxError( - "Failed to upload, unknown cause") + "Failed to upload, unknown cause", e) if not file_data or not file_data.get("uploadFile", None): raise labelbox.exceptions.LabelboxError( diff --git a/labelbox/exceptions.py b/labelbox/exceptions.py index 8f7d9088f..5f58e4cf8 100644 --- a/labelbox/exceptions.py +++ b/labelbox/exceptions.py @@ -1,8 +1,18 @@ class LabelboxError(Exception): """Base class for exceptions.""" - def __init__(self, message, *args): - super().__init__(*args) + def __init__(self, message, cause=None): + """ + Args: + message (str): Informative message about the exception. + cause (Exception): The cause of the exception (an Exception + raised by Python or another library). Optional. + """ + super().__init__(message, cause) self.message = message + self.cause = cause + + def __str__(self): + return self.message + str(self.args) class AuthenticationError(LabelboxError): @@ -31,9 +41,8 @@ def __init__(self, db_object_type, params): class ValidationFailedError(LabelboxError): - """Exception raised for when a GraphQL query fails validation (query cost, etc.) - - E.g. a query that is too expensive, or depth is too deep. + """Exception raised for when a GraphQL query fails validation (query cost, + etc.) E.g. a query that is too expensive, or depth is too deep. """ pass @@ -47,10 +56,8 @@ class InvalidQueryError(LabelboxError): class NetworkError(LabelboxError): """Raised when an HTTPError occurs.""" - def __init__(self, cause, message=None): - if message is None: - message = str(cause) - super().__init__(message) + def __init__(self, cause): + super().__init__(str(cause), cause) self.cause = cause diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 1140052dc..20217a4c1 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -48,8 +48,7 @@ def create_data_row(self, **kwargs): # If row data is a local file path, upload it to server. row_data = kwargs[DataRow.row_data.name] if os.path.exists(row_data): - with open(row_data, "rb") as f: - kwargs[DataRow.row_data.name] = self.client.upload_data(f.read()) + kwargs[DataRow.row_data.name] = self.client.upload_file(row_data) kwargs[DataRow.dataset.name] = self @@ -57,7 +56,7 @@ def create_data_row(self, **kwargs): def create_data_rows(self, items): """ Creates multiple DataRow objects based on the given `items`. - + Each element in `items` can be either a `str` or a `dict`. If it is a `str`, then it is interpreted as a local file path. The file is uploaded to Labelbox and a DataRow referencing it is created. @@ -91,9 +90,7 @@ def create_data_rows(self, items): def upload_if_necessary(item): if isinstance(item, str): - with open(item, "rb") as f: - item_data = f.read() - item_url = self.client.upload_data(item_data) + item_url = self.client.upload_file(item) # Convert item from str into a dict so it gets processed # like all other dicts. item = {DataRow.row_data: item_url, diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index c9bb660aa..81eb2384b 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -112,6 +112,8 @@ def export_labels(self, timeout_seconds=60): """ Calls the server-side Label exporting that generates a JSON payload, and returns the URL to that payload. + Will only generate a new URL at a max frequency of 30 min. + Args: timeout_seconds (float): Max waiting time, in seconds. Returns: @@ -199,6 +201,8 @@ def setup(self, labeling_frontend, labeling_frontend_options): if not isinstance(labeling_frontend_options, str): labeling_frontend_options = json.dumps(labeling_frontend_options) + self.labeling_frontend.connect(labeling_frontend) + LFO = Entity.LabelingFrontendOptions labeling_frontend_options = self.client._create( LFO, {LFO.project: self, LFO.labeling_frontend: labeling_frontend, @@ -206,7 +210,6 @@ def setup(self, labeling_frontend, labeling_frontend_options): LFO.organization: organization }) - self.labeling_frontend.connect(labeling_frontend) timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) diff --git a/setup.py b/setup.py index e49c98b14..6992f166a 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setuptools.setup( name="labelbox", - version="2.4", + version="2.4.1", author="Labelbox", author_email="engineering@labelbox.com", description="Labelbox Python API", diff --git a/tools/api_reference_generator.py b/tools/api_reference_generator.py new file mode 100755 index 000000000..ec011fa23 --- /dev/null +++ b/tools/api_reference_generator.py @@ -0,0 +1,451 @@ +#!/usr/bin/env python3 + +""" +Generates API documentation for the Labelbox Python Client in a form +tailored for HelpDocs (https://www.helpdocs.io). Supports automatic +uploading of generated documentation to Labelbox's HelpDocs pages, +if given a HelpDocs API Key for Labelbox with write priviledges. Otherwise +outputs the generated documenation to stdout. + +Must be invoked from within the `tools` directory in the Labelbox +Python client repo as it assumes that the Labelbox Python client source +can be found at relative path "../labelbox". + +Usage: + $ cd repo_root/tools + $ python3 db_object_doc_gen.py # outputs to stdout + $ python3 db_object_doc_gen.py # uploads to HelpDocs +""" + +from argparse import ArgumentParser, RawDescriptionHelpFormatter +from enum import Enum +import importlib +import inspect +from itertools import chain +import json +import os +import re +import sys + +import requests + +sys.path.insert(0, os.path.abspath("..")) + +import labelbox +from labelbox.utils import snake_case +from labelbox.exceptions import LabelboxError +from labelbox.orm.db_object import Deletable, BulkDeletable, Updateable +from labelbox.orm.model import Entity +from labelbox.schema.project import LabelerPerformance + + +GENERAL_CLASSES = [labelbox.Client] +SCHEMA_CLASSES = [ + labelbox.Project, labelbox.Dataset, labelbox.DataRow, labelbox.Label, + labelbox.AssetMetadata, labelbox.LabelingFrontend, labelbox.Task, + labelbox.Webhook, labelbox.User, labelbox.Organization, labelbox.Review, + labelbox.Prediction, labelbox.PredictionModel, + LabelerPerformance] + +ERROR_CLASSES = [LabelboxError] + LabelboxError.__subclasses__() + +_ALL_CLASSES = GENERAL_CLASSES + SCHEMA_CLASSES + ERROR_CLASSES + + +# Additional relationships injected into the Relationships part +# of a schema class. +ADDITIONAL_RELATIONSHIPS = { + "Project": ["labels (Label, ToMany)"]} + + +def tag(text, tag, values={}): + """ Wraps text into an HTML tag. Example: + >>> tag("Some text", "p", {"id": "id_value"}) + >>> "

Some text

+ + Args: + text (str): The text to wrap inside tags. + tag (str): The kind of tag. + values (dict): Optional additional tag key-value pairs. + """ + values = "".join(" %s=%s" % item for item in values.items()) + return "<%s%s>%s" % (tag, values, text, tag) + + +def header(level, text, header_id=None): + """ Wraps `text` into a (header) tag ov the given level. + Automatically increases the level by 2 to be inline with HelpDocs + standards (h1 -> h3). + + Example: + >>> header(2, "My Chapter") + >>> "

My Chapter

+ + Args: + level (int): Level of header. + text (str): Header text. + header_id (str or None): The ID of the header. If None it's + generated from text by converting to snake_case and + replacing all whitespace with "_". + """ + if header_id == None: + header_id = snake_case(text).replace(" ", "_") + # Convert to level + 2 for HelpDocs standard. + return tag(text, "h" + str(level + 2), {"id": header_id}) + + +def paragraph(text, link_classes=True): + if link_classes: + text = inject_class_links(text) + return tag(text, "p") + + +def strong(text): + return tag(text, "strong") + + +def em(text): + return tag(text, "em") + + +def unordered_list(items): + """ Formats given items into an unordered HTML list. Example: + >>> unordered_list(["First", "Second"]) + >>> " + """ + if len(items) == 0: + return "" + return tag("".join(tag(inject_class_links(item), "li") + for item in items), "ul") + + +def code_block(lines): + """ Wraps lines into a Python code block in HelpDocs standard. """ + return tag("
".join(lines), "pre", {"class": "hljs python"}) + + +def inject_class_links(text): + """ Finds all occurences of known class names in the given text and + replaces them with relative links to those classes. + """ + pattern_link_pairs = [ + (r"\b(%s.)?%ss?\b" % (cls.__module__, cls.__name__), + "#" + snake_case(cls.__name__)) + for cls in _ALL_CLASSES + ] + pattern_link_pairs.append((r"\bPaginatedCollection\b", + "general-concepts#pagination")) + + for pattern, link in pattern_link_pairs: + matches = list(re.finditer(pattern, text)) + for match in reversed(matches): + start, end = match.span() + link = tag(match.group(), "a", {"href": link}) + text = text[:start] + link + text[end:] + return text + + +def is_method(attribute): + """ Determines if the given attribute is most likely a method. It's + approximative since from Python 3 there are no more unbound methods. """ + return inspect.isfunction(attribute) and "." in attribute.__qualname__ \ + and inspect.getfullargspec(attribute).args[:1] == ['self'] + + +def preprocess_docstring(docstring): + """ Parses and re-formats the given class or method `docstring` + from Python documentation (Google style) into HelpDocs Python Client + API specification style. + """ + + def extract(docstring, keyword): + """ Helper method for extracting a part of the docstring. Parts + like "Returns" and "Args" are supported. Splits the `docstring` + into two parts, before and after the given keyword. + """ + if docstring is None or docstring == "": + return "", "" + + pattern = r"\n\s*%ss?:\s*\n" % keyword + split = re.split(pattern, docstring) + if len(split) == 1: + return docstring, None + elif len(split) == 2: + return split + else: + raise Exception("Docstring '%s' split in more then two parts " + "by keyword '%s'" % (docstring, keyword)) + + docstring, raises = extract(docstring, "Raise") + docstring, returns = extract(docstring, "Return") + docstring, kwargs = extract(docstring, "Kwarg") + docstring, args = extract(docstring, "Arg") + + def parse_list(text): + """ Helper method for parsing a list of items from Google-style + Python docstring. Used for argument and exception lists. Supports + multi-line text, assuming proper indentation. """ + if not bool(text): + return [] + + indent = re.match(r"^\s*", text).group() + lines = re.split(r"\n", text) + result = [lines[0].strip()] + for line in lines[1:]: + next_indent = re.match(r"^\s*", line).group() + if len(next_indent) > len(indent): + result[-1] += " " + line.strip() + else: + result.append(line.strip()) + + return unordered_list([em(name + ":") + descr for name, descr + in map(lambda r: r.split(":", 1), filter(None, result))]) + + def parse_block(block): + """ Helper for parsing a block of documentation that possibly contains + Python code in an indentent block with each line starting with ">>>". + """ + if block is None: + return "" + + result = [] + lines_p, f_p = [], lambda l: paragraph(" ".join(l)) + lines_code, f_code = [], code_block + + def process(collection, f): + if collection: + result.append(f(collection)) + collection.clear() + + for line in filter(None, map(str.strip, block.split("\n"))): + if line.startswith(">>>"): + process(lines_p, f_p) + lines_code.append(line) + else: + process(lines_code, f_code) + lines_p.append(line) + + process(lines_p, f_p) + process(lines_code, f_code) + + return "".join(result) + + def parse_maybe_block(text): + """ Adapts to text. Calls `parse_block` if there is a codeblock + indented, otherwise just joins lines into a single line and + reduces whitespace. + """ + if text is None: + return "" + if re.findall(r"\n\s+>>>", text): + return parse_block() + return re.sub(r"\s+", " ", text).strip() + + parts = (("Args: ", parse_list(args)), + ("Kwargs: ", parse_maybe_block(kwargs)), + ("Returns: ", parse_maybe_block(returns)), + ("Raises: ", parse_list(raises))) + + return parse_block(docstring) + unordered_list([ + strong(name) + item for name, item in parts if bool(item)]) + + +def generate_functions(cls, predicate): + """ Generates HelpDocs style documentation for the functions + of the given class that satisfy the given predicate. The functions + also must not being with "_", with the exception of Client.__init__. + + Args: + cls (type): The class being generated. + predicate (callable): A callable accepting a single argument + (class attribute) and returning a bool indicating if + that attribute should be included in documentation + generation. + Return: + Textual documentation of functions belonging to the given + class that satisfy the given predicate. + """ + def name_predicate(attr): + return not name.startswith("_") or (cls == labelbox.Client and + name == "__init__") + + # Get all class atrributes plus selected superclass attributes. + attributes = chain( + cls.__dict__.values(), + (getattr(cls, name) for name in ("delete", "update") + if name in dir(cls) and name not in cls.__dict__)) + + # Remove attributes not satisfying the predicate + attributes = filter(predicate, attributes) + + # Extract function from staticmethod and classmethod wrappers + attributes = map(lambda attr: getattr(attr, "__func__", attr), attributes) + + # Apply name filter + attributes = filter(lambda attr: not attr.__name__.startswith("_") or \ + (cls == labelbox.Client and attr.__name__ == "__init__"), + attributes) + + # Sort on name + attributes = sorted(attributes, key=lambda attr: attr.__name__) + + return "".join(paragraph(generate_signature(function)) + + preprocess_docstring(function.__doc__) + for function in attributes) + + +def generate_signature(method): + """ Generates HelpDocs style description of a method signature. """ + def fill_defaults(args, defaults): + if defaults == None: + defaults = tuple() + return (None, ) * (len(args) - len(defaults)) + defaults + + argspec = inspect.getfullargspec(method) + + def format_arg(arg, default): + return arg if default is None else arg + "=" + repr(default) + + components = list(map(format_arg, argspec.args, + fill_defaults(argspec.args, argspec.defaults))) + + if argspec.varargs: + components.append("*" + argspec.varargs) + if argspec.varkw: + components.append("**" + argspec.varkw) + + components.extend(map(format_arg, argspec.kwonlyargs, fill_defaults( + argspec.kwonlyargs, argspec.kwonlydefaults))) + + return tag(method.__name__ + "(" + ", ".join(components) + ")", "strong") + + +def generate_fields(cls): + """ Generates HelpDocs style documentation for all the fields of a + DbObject subclass. + """ + return unordered_list([ + field.name + " " + em("(" + field.field_type.name + ")") + for field in cls.fields()]) + + +def generate_relationships(cls): + """ Generates HelpDocs style documentation for all the relationships of a + DbObject subclass. + """ + relationships = list(ADDITIONAL_RELATIONSHIPS.get(cls.__name__, [])) + relationships.extend([ + r.name + " " + em("(%s %s)" % (r.destination_type_name, + r.relationship_type.name)) + for r in cls.relationships()]) + + return unordered_list(relationships) + + +def generate_constants(cls): + values = [] + for name, value in cls.__dict__.items(): + if name.isupper() and isinstance(value, (str, int, float, bool)): + values.append("%s %s" % (name, em("(" + type(value).__name__ + ")"))) + + for name, value in cls.__dict__.items(): + if isinstance(value, type) and issubclass(value, Enum): + enumeration_items = unordered_list([item.name for item in value]) + values.append("Enumeration %s%s" % (name, enumeration_items)) + + return unordered_list(values) + + +def generate_class(cls): + """ Generates HelpDocs style documentation for the given class. + Args: + cls (type): The class to generate docs for. + Return: + HelpDocs style documentation for `cls` containing class description, + methods and fields and relationships if `schema_class`. + """ + text = [] + schema_class = issubclass(cls, Entity) + + text.append(header(2, cls.__name__, snake_case(cls.__name__))) + + package_and_superclasses = "Class " + cls.__module__ + "." + cls.__name__ + if schema_class: + superclasses = [plugin.__name__ for plugin + in (Updateable, Deletable, BulkDeletable) + if issubclass(cls, plugin )] + if superclasses: + package_and_superclasses += " (%s)" % ", ".join(superclasses) + package_and_superclasses += "." + text.append(paragraph(package_and_superclasses, False)) + + text.append(preprocess_docstring(cls.__doc__)) + + constants = generate_constants(cls) + if constants: + text.append(header(3, "Constants")) + text.append(constants) + + if schema_class: + text.append(header(3, "Fields")) + text.append(generate_fields(cls)) + text.append(header(3, "Relationships")) + text.append(generate_relationships(cls)) + + for name, predicate in ( + ("Static Methods", lambda attr: type(attr) == staticmethod), + ("Class Methods", lambda attr: type(attr) == classmethod), + ("Object Methods", is_method)): + functions = generate_functions(cls, predicate).strip() + if len(functions): + text.append(header(3, name)) + text.append(functions) + + return "\n".join(text) + + +def generate_all(): + """ Generates the full HelpDocs API documentation article body. """ + text = [] + text.append(header(3, "General Classes")) + text.append(unordered_list([cls.__name__ for cls in GENERAL_CLASSES])) + text.append(header(3, "Data Classes")) + text.append(unordered_list([cls.__name__ for cls in SCHEMA_CLASSES])) + text.append(header(3, "Error Classes")) + text.append(unordered_list([cls.__name__ for cls in ERROR_CLASSES])) + + text.append(header(1, "General classes")) + text.extend(generate_class(cls) for cls in GENERAL_CLASSES) + text.append(header(1, "Data Classes")) + text.extend(generate_class(cls) for cls in SCHEMA_CLASSES) + text.append(header(1, "Error Classes")) + text.extend(generate_class(cls) for cls in ERROR_CLASSES) + return "\n".join(text) + + +def main(): + argp = ArgumentParser(description=__doc__, + formatter_class=RawDescriptionHelpFormatter) + argp.add_argument("helpdocs_api_key", nargs="?", + help="Helpdocs API key, used in uploading directly ") + + args = argp.parse_args() + + body = generate_all() + + if args.helpdocs_api_key is not None: + url = "https://api.helpdocs.io/v1/article/zg9hp7yx3u?key=" + \ + args.helpdocs_api_key + response = requests.patch(url, data=json.dumps({"body": body}), + headers={'content-type': 'application/json'}) + if response.status_code != 200: + raise Exception("Failed to upload article with status code: %d " + " and message: %s", response.status_code, + response.text) + else: + sys.stdout.write(body) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main()