diff --git a/Dockerfile b/Dockerfile index 7849fe514..c2a9face0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,6 @@ FROM python:3.7 RUN pip install pytest pytest-cases - WORKDIR /usr/src/labelbox COPY requirements.txt /usr/src/labelbox RUN pip install -r requirements.txt diff --git a/labelbox/exceptions.py b/labelbox/exceptions.py index a1a3a1fc7..45f5fadcc 100644 --- a/labelbox/exceptions.py +++ b/labelbox/exceptions.py @@ -106,6 +106,10 @@ class UuidError(LabelboxError): pass +class InconsistentOntologyException(Exception): + pass + + class MALValidationError(LabelboxError): """Raised when user input is invalid for MAL imports.""" ... diff --git a/labelbox/schema/ontology.py b/labelbox/schema/ontology.py index 6fea2fa8d..de321f722 100644 --- a/labelbox/schema/ontology.py +++ b/labelbox/schema/ontology.py @@ -1,66 +1,234 @@ import abc -from dataclasses import dataclass +from dataclasses import dataclass, field +from enum import Enum, auto +import colorsys from typing import Any, Callable, Dict, List, Optional, Union +from labelbox.schema.project import Project from labelbox.orm import query from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable from labelbox.orm.model import Entity, Field, Relationship from labelbox.utils import snake_case, camel_case +from labelbox.exceptions import InconsistentOntologyException @dataclass -class OntologyEntity: - required: bool - name: str +class Option: + """ + An option is a possible answer within a Classification object in + a Project's ontology. + To instantiate, only the "value" parameter needs to be passed in. -@dataclass -class Option: - label: str - value: str + Example(s): + option = Option(value = "Option Example") + + Attributes: + value: (str) + schema_id: (str) + feature_schema_id: (str) + options: (list) + """ + value: Union[str, int] + schema_id: Optional[str] = None feature_schema_id: Optional[str] = None - schema_node_id: Optional[str] = None + options: List["Classification"] = field(default_factory=list) + + @property + def label(self): + return self.value @classmethod - def from_json(cls, json_dict): - _dict = convert_keys(json_dict, snake_case) - return cls(**_dict) + def from_dict(cls, dictionary: Dict[str, Any]): + return Option(value=dictionary["value"], + schema_id=dictionary.get("schemaNodeId", []), + feature_schema_id=dictionary.get("featureSchemaId", []), + options=[ + Classification.from_dict(o) + for o in dictionary.get("options", []) + ]) + + def asdict(self) -> Dict[str, Any]: + return { + "schemaNodeId": self.schema_id, + "featureSchemaId": self.feature_schema_id, + "label": self.label, + "value": self.value, + "options": [o.asdict() for o in self.options] + } + + def add_option(self, option: 'Classification'): + if option.instructions in (o.instructions for o in self.options): + raise InconsistentOntologyException( + f"Duplicate nested classification '{option.instructions}' " + f"for option '{self.label}'") + self.options.append(option) @dataclass -class Classification(OntologyEntity): - type: str +class Classification: + """ + A classfication to be added to a Project's ontology. The + classification is dependent on the Classification Type. + + To instantiate, the "class_type" and "instructions" parameters must + be passed in. + + The "options" parameter holds a list of Option objects. This is not + necessary for some Classification types, such as TEXT. To see which + types require options, look at the "_REQUIRES_OPTIONS" class variable. + + Example(s): + classification = Classification( + class_type = Classification.Type.TEXT, + instructions = "Classification Example") + + classification_two = Classification( + class_type = Classification.Type.RADIO, + instructions = "Second Example") + classification_two.add_option(Option( + value = "Option Example")) + + Attributes: + class_type: (Classification.Type) + instructions: (str) + required: (bool) + options: (list) + schema_id: (str) + feature_schema_id: (str) + """ + + class Type(Enum): + TEXT = "text" + CHECKLIST = "checklist" + RADIO = "radio" + DROPDOWN = "dropdown" + + _REQUIRES_OPTIONS = {Type.CHECKLIST, Type.RADIO, Type.DROPDOWN} + + class_type: Type instructions: str - options: List[Option] + required: bool = False + options: List[Option] = field(default_factory=list) + schema_id: Optional[str] = None feature_schema_id: Optional[str] = None - schema_node_id: Optional[str] = None + + @property + def name(self): + return self.instructions @classmethod - def from_json(cls, json_dict): - _dict = convert_keys(json_dict, snake_case) - _dict['options'] = [ - Option.from_json(option) for option in _dict['options'] - ] - return cls(**_dict) + def from_dict(cls, dictionary: Dict[str, Any]): + return Classification( + class_type=Classification.Type(dictionary["type"]), + instructions=dictionary["instructions"], + required=dictionary["required"], + options=[Option.from_dict(o) for o in dictionary["options"]], + schema_id=dictionary.get("schemaNodeId", []), + feature_schema_id=dictionary.get("featureSchemaId", [])) + + def asdict(self) -> Dict[str, Any]: + if self.class_type in Classification._REQUIRES_OPTIONS \ + and len(self.options) < 1: + raise InconsistentOntologyException( + f"Classification '{self.instructions}' requires options.") + return { + "type": self.class_type.value, + "instructions": self.instructions, + "name": self.name, + "required": self.required, + "options": [o.asdict() for o in self.options], + "schemaNodeId": self.schema_id, + "featureSchemaId": self.feature_schema_id + } + + def add_option(self, option: Option): + if option.value in (o.value for o in self.options): + raise InconsistentOntologyException( + f"Duplicate option '{option.value}' " + f"for classification '{self.name}'.") + self.options.append(option) @dataclass -class Tool(OntologyEntity): - tool: str - color: str - classifications: List[Classification] +class Tool: + """ + A tool to be added to a Project's ontology. The tool is + dependent on the Tool Type. + + To instantiate, the "tool" and "name" parameters must + be passed in. + + The "classifications" parameter holds a list of Classification objects. + This can be used to add nested classifications to a tool. + + Example(s): + tool = Tool( + tool = Tool.Type.LINE, + name = "Tool example") + classification = Classification( + class_type = Classification.Type.TEXT, + instructions = "Classification Example") + tool.add_classification(classification) + + Attributes: + tool: (Tool.Type) + name: (str) + required: (bool) + color: (str) + classifications: (list) + schema_id: (str) + feature_schema_id: (str) + """ + + class Type(Enum): + POLYGON = "polygon" + SEGMENTATION = "superpixel" + POINT = "point" + BBOX = "rectangle" + LINE = "line" + NER = "named-entity" + + tool: Type + name: str + required: bool = False + color: Optional[str] = None + classifications: List[Classification] = field(default_factory=list) + schema_id: Optional[str] = None feature_schema_id: Optional[str] = None - schema_node_id: Optional[str] = None @classmethod - def from_json(cls, json_dict): - _dict = convert_keys(json_dict, snake_case) - _dict['classifications'] = [ - Classification.from_json(classification) - for classification in _dict['classifications'] - ] - return cls(**_dict) + def from_dict(cls, dictionary: Dict[str, Any]): + return Tool(name=dictionary['name'], + schema_id=dictionary.get("schemaNodeId", []), + feature_schema_id=dictionary.get("featureSchemaId", []), + required=dictionary["required"], + tool=Tool.Type(dictionary["tool"]), + classifications=[ + Classification.from_dict(c) + for c in dictionary["classifications"] + ], + color=dictionary["color"]) + + def asdict(self) -> Dict[str, Any]: + return { + "tool": self.tool.value, + "name": self.name, + "required": self.required, + "color": self.color, + "classifications": [c.asdict() for c in self.classifications], + "schemaNodeId": self.schema_id, + "featureSchemaId": self.feature_schema_id + } + + def add_classification(self, classification: Classification): + if classification.instructions in ( + c.instructions for c in self.classifications): + raise InconsistentOntologyException( + f"Duplicate nested classification '{classification.instructions}' " + f"for tool '{self.name}'") + self.classifications.append(classification) class Ontology(DbObject): @@ -98,27 +266,89 @@ def tools(self) -> List[Tool]: """Get list of tools (AKA objects) in an Ontology.""" if self._tools is None: self._tools = [ - Tool.from_json(tool) for tool in self.normalized['tools'] + Tool.from_dict(tool) for tool in self.normalized['tools'] ] - return self._tools # type: ignore + return self._tools def classifications(self) -> List[Classification]: """Get list of classifications in an Ontology.""" if self._classifications is None: self._classifications = [ - Classification.from_json(classification) + Classification.from_dict(classification) for classification in self.normalized['classifications'] ] - return self._classifications # type: ignore + return self._classifications + + +@dataclass +class OntologyBuilder: + """ + A class to help create an ontology for a Project. This should be used + for making Project ontologies from scratch. OntologyBuilder can also + pull from an already existing Project's ontology. + + There are no required instantiation arguments. + + To create an ontology, use the asdict() method after fully building your + ontology within this class, and inserting it into project.setup() as the + "labeling_frontend_options" parameter. + Example: + builder = OntologyBuilder() + ... + frontend = list(client.get_labeling_frontends())[0] + project.setup(frontend, builder.asdict()) -def convert_keys(json_dict: Dict[str, Any], - converter: Callable) -> Dict[str, Any]: - if isinstance(json_dict, dict): + attributes: + tools: (list) + classifications: (list) + + + """ + tools: List[Tool] = field(default_factory=list) + classifications: List[Classification] = field(default_factory=list) + + @classmethod + def from_dict(cls, dictionary: Dict[str, Any]): + return OntologyBuilder( + tools=[Tool.from_dict(t) for t in dictionary["tools"]], + classifications=[ + Classification.from_dict(c) + for c in dictionary["classifications"] + ]) + + def asdict(self): + self._update_colors() return { - converter(key): convert_keys(value, converter) - for key, value in json_dict.items() + "tools": [t.asdict() for t in self.tools], + "classifications": [c.asdict() for c in self.classifications] } - if isinstance(json_dict, list): - return [convert_keys(ele, converter) for ele in json_dict] - return json_dict + + def _update_colors(self): + num_tools = len(self.tools) + + for index in range(num_tools): + hsv_color = (index * 1 / num_tools, 1, 1) + rgb_color = tuple( + int(255 * x) for x in colorsys.hsv_to_rgb(*hsv_color)) + if self.tools[index].color is None: + self.tools[index].color = '#%02x%02x%02x' % rgb_color + + @classmethod + def from_project(cls, project: Project): + ontology = project.ontology().normalized + return OntologyBuilder.from_dict(ontology) + + def add_tool(self, tool: Tool): + if tool.name in (t.name for t in self.tools): + raise InconsistentOntologyException( + f"Duplicate tool name '{tool.name}'. ") + self.tools.append(tool) + + def add_classification(self, classification: Classification): + if classification.instructions in ( + c.instructions for c in self.classifications): + raise InconsistentOntologyException( + f"Duplicate classification instructions '{classification.instructions}'. " + ) + self.classifications.append(classification) diff --git a/tests/integration/test_ontology.py b/tests/integration/test_ontology.py index ea3425cd4..8666ce055 100644 --- a/tests/integration/test_ontology.py +++ b/tests/integration/test_ontology.py @@ -1,76 +1,246 @@ -import unittest from typing import Any, Dict, List, Union -from labelbox import LabelingFrontend +import pytest + +from labelbox import LabelingFrontend +from labelbox.exceptions import InconsistentOntologyException +from labelbox.schema.ontology import Tool, Classification, Option, \ + Ontology, OntologyBuilder -def sample_ontology() -> Dict[str, Any]: - return { - "tools": [{ - "required": False, - "name": "Dog", - "color": "#FF0000", - "tool": "rectangle", - "classifications": [] - }], +_SAMPLE_ONTOLOGY = { + "tools": [{ + "schemaNodeId": None, + "featureSchemaId": None, + "required": False, + "name": "poly", + "color": "#FF0000", + "tool": "polygon", + "classifications": [] + }, { + "schemaNodeId": None, + "featureSchemaId": None, + "required": False, + "name": "segment", + "color": "#FF0000", + "tool": "superpixel", + "classifications": [] + }, { + "schemaNodeId": + None, + "featureSchemaId": + None, + "required": + False, + "name": + "bbox", + "color": + "#FF0000", + "tool": + "rectangle", "classifications": [{ + "schemaNodeId": + None, + "featureSchemaId": + None, "required": True, "instructions": - "This is a question.", + "nested classification", "name": - "this_is_a_question.", + "nested classification", "type": "radio", "options": [{ - "label": "Yes", - "value": "yes" + "schemaNodeId": + None, + "featureSchemaId": + None, + "label": + "first", + "value": + "first", + "options": [{ + "schemaNodeId": None, + "featureSchemaId": None, + "required": False, + "instructions": "nested nested text", + "name": "nested nested text", + "type": "text", + "options": [] + }] }, { - "label": "No", - "value": "no" + "schemaNodeId": None, + "featureSchemaId": None, + "label": "second", + "value": "second", + "options": [] }] + }, { + "schemaNodeId": None, + "featureSchemaId": None, + "required": True, + "instructions": "nested text", + "name": "nested text", + "type": "text", + "options": [] }] - } - - -def test_create_ontology(client, project) -> None: - """ Tests that the ontology that a project was set up with can be grabbed.""" - frontend = list( - client.get_labeling_frontends( - where=LabelingFrontend.name == "Editor"))[0] - project.setup(frontend, sample_ontology()) - normalized_ontology = project.ontology().normalized - - def _remove_schema_ids( - ontology_part: Union[List, Dict[str, Any]]) -> Dict[str, Any]: - """ Recursively scrub the normalized ontology of any schema information.""" - removals = {'featureSchemaId', 'schemaNodeId'} - - if isinstance(ontology_part, list): - return [_remove_schema_ids(part) for part in ontology_part] - if isinstance(ontology_part, dict): - return { - key: _remove_schema_ids(value) - for key, value in ontology_part.items() - if key not in removals - } - return ontology_part - - removed = _remove_schema_ids(normalized_ontology) - assert removed == sample_ontology() - - ontology = project.ontology() - - tools = ontology.tools() - assert tools - for tool in tools: - assert tool.feature_schema_id - assert tool.schema_node_id - - classifications = ontology.classifications() - assert classifications - for classification in classifications: - assert classification.feature_schema_id - assert classification.schema_node_id - for option in classification.options: - assert option.feature_schema_id - assert option.schema_node_id + }, { + "schemaNodeId": None, + "featureSchemaId": None, + "required": False, + "name": "dot", + "color": "#FF0000", + "tool": "point", + "classifications": [] + }, { + "schemaNodeId": None, + "featureSchemaId": None, + "required": False, + "name": "polyline", + "color": "#FF0000", + "tool": "line", + "classifications": [] + }, { + "schemaNodeId": None, + "featureSchemaId": None, + "required": False, + "name": "ner", + "color": "#FF0000", + "tool": "named-entity", + "classifications": [] + }], + "classifications": [{ + "schemaNodeId": + None, + "featureSchemaId": + None, + "required": + True, + "instructions": + "This is a question.", + "name": + "This is a question.", + "type": + "radio", + "options": [{ + "schemaNodeId": None, + "featureSchemaId": None, + "label": "yes", + "value": "yes", + "options": [] + }, { + "schemaNodeId": None, + "featureSchemaId": None, + "label": "no", + "value": "no", + "options": [] + }] + }] +} + + +@pytest.mark.parametrize("tool_type", list(Tool.Type)) +def test_create_tool(tool_type) -> None: + t = Tool(tool=tool_type, name="tool") + assert (t.tool == tool_type) + + +@pytest.mark.parametrize("class_type", list(Classification.Type)) +def test_create_classification(class_type) -> None: + c = Classification(class_type=class_type, instructions="classification") + assert (c.class_type == class_type) + + +@pytest.mark.parametrize("value, expected_value, typing", + [(3, 3, int), ("string", "string", str)]) +def test_create_option(value, expected_value, typing) -> None: + o = Option(value=value) + assert (o.value == expected_value) + assert (o.value == o.label) + + +def test_create_empty_ontology() -> None: + o = OntologyBuilder() + assert (o.tools == []) + assert (o.classifications == []) + + +def test_add_ontology_tool() -> None: + o = OntologyBuilder() + o.add_tool(Tool(tool=Tool.Type.BBOX, name="bounding box")) + + second_tool = Tool(tool=Tool.Type.SEGMENTATION, name="segmentation") + o.add_tool(second_tool) + assert len(o.tools) == 2 + + for tool in o.tools: + assert (type(tool) == Tool) + + with pytest.raises(InconsistentOntologyException) as exc: + o.add_tool(Tool(tool=Tool.Type.BBOX, name="bounding box")) + assert "Duplicate tool name" in str(exc.value) + + +def test_add_ontology_classification() -> None: + o = OntologyBuilder() + o.add_classification( + Classification(class_type=Classification.Type.TEXT, + instructions="text")) + + second_classification = Classification( + class_type=Classification.Type.CHECKLIST, instructions="checklist") + o.add_classification(second_classification) + assert len(o.classifications) == 2 + + for classification in o.classifications: + assert (type(classification) == Classification) + + with pytest.raises(InconsistentOntologyException) as exc: + o.add_classification( + Classification(class_type=Classification.Type.TEXT, + instructions="text")) + assert "Duplicate classification instructions" in str(exc.value) + + +def test_tool_add_classification() -> None: + t = Tool(tool=Tool.Type.SEGMENTATION, name="segmentation") + c = Classification(class_type=Classification.Type.TEXT, instructions="text") + t.add_classification(c) + assert t.classifications == [c] + + with pytest.raises(Exception) as exc: + t.add_classification(c) + assert "Duplicate nested classification" in str(exc) + + +def test_classification_add_option() -> None: + c = Classification(class_type=Classification.Type.RADIO, + instructions="radio") + o = Option(value="option") + c.add_option(o) + assert c.options == [o] + + with pytest.raises(InconsistentOntologyException) as exc: + c.add_option(Option(value="option")) + assert "Duplicate option" in str(exc.value) + + +def test_option_add_option() -> None: + o = Option(value="option") + c = Classification(class_type=Classification.Type.TEXT, instructions="text") + o.add_option(c) + assert o.options == [c] + + with pytest.raises(InconsistentOntologyException) as exc: + o.add_option(c) + assert "Duplicate nested classification" in str(exc.value) + + +def test_ontology_asdict(project) -> None: + assert OntologyBuilder.from_dict( + _SAMPLE_ONTOLOGY).asdict() == _SAMPLE_ONTOLOGY + + +def test_from_project_ontology(client, project) -> None: + o = OntologyBuilder.from_project(project) + assert o.asdict() == project.ontology().normalized