diff --git a/examples/README.md b/examples/README.md index eda753467..b10d7dde5 100644 --- a/examples/README.md +++ b/examples/README.md @@ -18,20 +18,19 @@ | Ontologies | [Github](basics/ontologies.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/ontologies.ipynb) | | Projects | [Github](basics/projects.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/projects.ipynb) | | User Management | [Github](basics/user_management.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/user_management.ipynb) | +| Export V1 to V2 migration | [Github](basics/Export_V1_to_V2_migration_support.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/Export_V1_to_V2_migration_support.ipynb) | --- +## [Exports](exports) -## [Model Training](https://docs.labelbox.com/docs/integration-with-model-training-service) - -Train a model using data annotated on Labelbox - -| Notebook | Github | Google Colab | -| ------------------------------- | ----------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Object Detection (Detectron2) | [Github](integrations/detectron2/coco_object.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/integrations/detectron2/coco_object.ipynb) | -| Panoptic Detection (Detectron2) | [Github](integrations/detectron2/coco_panoptic.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/integrations/detectron2/coco_panoptic.ipynb) | +| Notebook | Github | Google Colab | Learn more | +| ------------------------------------- | ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- | +| Export data | [Github](exports/export_data.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/exports/export_data.ipynb) | +| Composite mask export | [Github](exports/composite_mask_export.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/exports/composite_mask_export.ipynb) | --- + ## [Annotation Import (Ground Truth & MAL)](annotation_import) | Notebook | Github | Google Colab | Learn more | diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb index f0a012e5c..349feabb8 100644 --- a/examples/annotation_import/pdf.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -79,6 +79,8 @@ "metadata": {}, "source": [ "import uuid\n", + "import json\n", + "import requests\n", "import labelbox as lb\n", "import labelbox.types as lb_types\n" ], @@ -133,7 +135,7 @@ ")\n", "\n", "# NDJSON\n", - "entities_annotations_ndjson = { \n", + "entities_annotations_ndjson = {\n", " \"name\": \"named_entity\",\n", " \"textSelections\": [\n", " {\n", @@ -155,10 +157,10 @@ "source": [ "########### Radio Classification #########\n", "\n", - "# Annotation types \n", + "# Annotation types\n", "radio_annotation = lb_types.ClassificationAnnotation(\n", " name=\"radio_question\",\n", - " value=lb_types.Radio(answer = \n", + " value=lb_types.Radio(answer =\n", " lb_types.ClassificationAnswer(name = \"first_radio_answer\")\n", " )\n", ")\n", @@ -177,7 +179,7 @@ "source": [ "############ Checklist Classification ###########\n", "\n", - "# Annotation types \n", + "# Annotation types\n", "checklist_annotation = lb_types.ClassificationAnnotation(\n", " name=\"checklist_question\",\n", " value=lb_types.Checklist(answer = [\n", @@ -208,7 +210,7 @@ "bbox_annotation = lb_types.ObjectAnnotation(\n", " name=\"bounding_box\", # must match your ontology feature\"s name\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top \n", + " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n", " end=lb_types.Point(x=518.571, y=245.143), # x= left + width , y = top + height\n", " page=0,\n", " unit=lb_types.RectangleUnit.POINTS\n", @@ -257,13 +259,13 @@ "nested_checklist_annotation_ndjson = {\n", " \"name\": \"nested_checklist_question\",\n", " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\", \n", + " \"name\": \"first_checklist_answer\",\n", " \"classifications\" : [\n", " {\n", - " \"name\": \"sub_checklist_question\", \n", + " \"name\": \"sub_checklist_question\",\n", " \"answer\": {\"name\": \"first_sub_checklist_answer\"}\n", - " } \n", - " ] \n", + " }\n", + " ]\n", " }]\n", "}\n", "\n", @@ -306,7 +308,7 @@ { "metadata": {}, "source": [ - "############## Classification Free-form text ############## \n", + "############## Classification Free-form text ##############\n", "\n", "text_annotation = lb_types.ClassificationAnnotation(\n", " name=\"free_text\", # must match your ontology feature\"s name\n", @@ -331,7 +333,7 @@ "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", " name=\"bbox_with_radio_subclass\",\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top \n", + " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n", " end=lb_types.Point(x=566.657, y=420.986), # x= left + width , y = top + height\n", " unit=lb_types.RectangleUnit.POINTS,\n", " page=1\n", @@ -362,12 +364,12 @@ " \"name\": \"bbox_with_radio_subclass\",\n", " \"classifications\": [\n", " {\n", - " \"name\": \"sub_radio_question\", \n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\", \n", + " \"name\": \"first_sub_radio_answer\",\n", " \"classifications\": [\n", " {\n", - " \"name\": \"second_sub_radio_question\", \n", + " \"name\": \"second_sub_radio_question\",\n", " \"answer\": {\n", " \"name\": \"second_sub_radio_answer\"}\n", " }\n", @@ -392,7 +394,7 @@ { "metadata": {}, "source": [ - "############ NER with nested classifications ######## \n", + "############ NER with nested classifications ########\n", "\n", "\n", "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", @@ -423,7 +425,7 @@ " \"classifications\":[\n", " {\n", " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\"name\": \"first_sub_checklist_answer\"}] \n", + " \"answer\": [{\"name\": \"first_sub_checklist_answer\"}]\n", " }\n", " ],\n", " \"textSelections\": [\n", @@ -434,9 +436,9 @@ " \"groupId\": \"\",\n", " \"page\": 1\n", " }\n", - " ] \n", + " ]\n", "}\n", - " \n" + "\n" ], "cell_type": "code", "outputs": [], @@ -445,7 +447,7 @@ { "metadata": {}, "source": [ - "######### Relationships ########## \n", + "######### Relationships ##########\n", "entity_source = lb_types.ObjectAnnotation(\n", " name=\"named_entity\",\n", " value= lb_types.DocumentEntity(\n", @@ -482,7 +484,7 @@ " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", " ))\n", "\n", - "## Only supported for MAL imports \n", + "## Only supported for MAL imports\n", "uuid_source = str(uuid.uuid4())\n", "uuid_target = str(uuid.uuid4())\n", "\n", @@ -498,7 +500,7 @@ " \"page\": 1\n", " }\n", " ]\n", - " \n", + "\n", "}\n", "\n", "entity_target_ndjson = {\n", @@ -515,7 +517,7 @@ " ]\n", "}\n", "ner_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\", \n", + " \"name\": \"relationship\",\n", " \"relationship\": {\n", " \"source\": uuid_source,\n", " \"target\": uuid_target,\n", @@ -537,7 +539,7 @@ "bbox_source = lb_types.ObjectAnnotation(\n", " name=\"bounding_box\",\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top \n", + " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n", " end=lb_types.Point(x=270.907, y=149.556), # x = left + width , y = top + height\n", " unit=lb_types.RectangleUnit.POINTS,\n", " page=1\n", @@ -563,7 +565,7 @@ " ))\n", "\n", "\n", - "## Only supported for MAL imports \n", + "## Only supported for MAL imports\n", "uuid_source_2 = str(uuid.uuid4())\n", "uuid_target_2 = str(uuid.uuid4())\n", "\n", @@ -594,7 +596,7 @@ "}\n", "\n", "bbox_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\", \n", + " \"name\": \"relationship\",\n", " \"relationship\": {\n", " \"source\": uuid_source_2,\n", " \"target\": uuid_target_2,\n", @@ -623,12 +625,27 @@ { "metadata": {}, "source": [ - "## Text layer url is required for uploading entity annotations\n", - "global_key = \"0801.3483.pdf\"\n", + "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", + "\n", + "However, it's important to note that Google Document AI imposes specific restrictions on document size:\n", + "- The document must have no more than 15 pages.\n", + "- The file size should not exceed 20 MB.\n", + "\n", + "Furthermore, Google Document AI optimizes documents before OCR processing. This optimization might include rotating images or pages to ensure that text appears horizontally. Consequently, token coordinates are calculated based on the rotated/optimized images, resulting in potential discrepancies with the original PDF document.\n", + "\n", + "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", + "\n", + "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "global_key = \"0801.3483_doc.pdf\"\n", "img_url = {\n", " \"row_data\": {\n", - " \"pdf_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n", - " \"text_layer_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json\"\n", + " \"pdf_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", " },\n", " \"global_key\": global_key\n", "}\n", @@ -637,8 +654,15 @@ "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", "task = dataset.create_data_rows([img_url])\n", "task.wait_till_done()\n", - "print(\"Errors:\",task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if 'Duplicate global key' in error['message'] and dataset.row_count == 0:\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" ], "cell_type": "code", "outputs": [], @@ -659,9 +683,9 @@ "\n", "ontology_builder = lb.OntologyBuilder(\n", " classifications=[ # List of Classification objects\n", - " lb.Classification( \n", + " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\", \n", + " name=\"radio_question\",\n", " scope = lb.Classification.Scope.GLOBAL,\n", " options=[\n", " lb.Option(value=\"first_radio_answer\"),\n", @@ -670,13 +694,13 @@ " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\", \n", + " name=\"checklist_question\",\n", " scope = lb.Classification.Scope.GLOBAL,\n", " options=[\n", " lb.Option(value=\"first_checklist_answer\"),\n", " lb.Option(value=\"second_checklist_answer\")\n", " ]\n", - " ), \n", + " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.TEXT,\n", " name=\"free_text\",\n", @@ -706,15 +730,15 @@ " options=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\", \n", + " name=\"sub_checklist_question\",\n", " options=[lb.Option(\"first_sub_checklist_answer\")]\n", " )\n", " ])\n", " ]\n", - " ), \n", + " ),\n", " ],\n", " tools=[ # List of Tool objects\n", - " lb.Tool( tool=lb.Tool.Type.BBOX,name=\"bounding_box\"), \n", + " lb.Tool( tool=lb.Tool.Type.BBOX,name=\"bounding_box\"),\n", " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP,name=\"relationship\"),\n", " lb.Tool(tool=lb.Tool.Type.NER,\n", @@ -767,7 +791,7 @@ "metadata": {}, "source": [ "# Create a Labelbox project\n", - "project = client.create_project(name=\"PDF_annotation_demo\", \n", + "project = client.create_project(name=\"PDF_annotation_demo\",\n", " media_type=lb.MediaType.Document)\n", "project.setup_editor(ontology)" ], @@ -811,17 +835,39 @@ "metadata": {}, "source": [ "##### Step 5.1: First, we need to populate the text selections for Entity annotations\n", - "To learn how to generate a text layer for your documents please refer to the following repositories/files: \n", - "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/gcloud/gcp-vision-to-lb-text-layer.py\n", - "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/adobe/adobe-ocr-to-lb-text-layer.py\n" + "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`\n" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "import requests\n", - "import json\n", + "To extract the generated text layer url we first need to export the data row" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "client.enable_experimental = True\n", + "task = lb.DataRow.export(client=client,global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_stream()\n", + "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = json.loads(output.json_str)\n", + " text_layer = output_json['media_attributes']['text_layer_url']\n", + "print(text_layer)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ "\n", "# Helper method\n", "def update_text_selections(annotation, group_id, list_tokens, page):\n", @@ -834,16 +880,14 @@ " }\n", " ]\n", " })\n", - " \n", "\n", - "text_layer = \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json\"\n", "\n", "# Fetch the content of the text layer\n", - "res = requests.get(text_layer) \n", + "res = requests.get(text_layer)\n", "\n", "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\"Metal-insulator (MI) transitions have been one of the\" , \n", - " \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\", \n", + "content_phrases = [\"Metal-insulator (MI) transitions have been one of the\" ,\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", " \"Organic charge transfer salts based on the donor\",\n", " \"the experimental investigations on this issue have not\"]\n", "\n", @@ -862,7 +906,7 @@ " text_selections.append(document_text_selection)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=entities_annotations_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=list_tokens, # ids representing individual words from the group\n", " page=1)\n", " if group[\"content\"] == content_phrases[1]:\n", @@ -872,7 +916,7 @@ " text_selections_ner.append(ner_text_selection)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=ner_with_checklist_subclass_annotation_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=list_tokens_2, # ids representing individual words from the group\n", " page=1)\n", " if group[\"content\"] == content_phrases[2]:\n", @@ -882,7 +926,7 @@ " text_selections_source.append(text_selection_entity_source)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=entity_source_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=relationship_source, # ids representing individual words from the group\n", " page=1)\n", " if group[\"content\"] == content_phrases[3]:\n", @@ -892,7 +936,7 @@ " text_selections_target.append(text_selection_entity_target)\n", " # build text selections forthe NDJson annotations\n", " update_text_selections(annotation=entity_target_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=relationship_target, # ids representing individual words from the group\n", " page=1)\n" ], @@ -910,11 +954,11 @@ { "metadata": {}, "source": [ - "#re-write the entity annotation with text selections \n", + "#re-write the entity annotation with text selections\n", "entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", textSelections = text_selections)\n", "entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",value=entities_annotation_document_entity)\n", "\n", - "# re-write the entity annotation + subclassification with text selections \n", + "# re-write the entity annotation + subclassification with text selections\n", "classifications = [\n", " lb_types.ClassificationAnnotation(\n", " name=\"sub_checklist_question\",\n", @@ -924,8 +968,8 @@ " )\n", " ]\n", "ner_annotation_with_subclass = lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\", textSelections= text_selections_ner)\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name=\"ner_with_checklist_subclass\", \n", - " value=ner_annotation_with_subclass, \n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name=\"ner_with_checklist_subclass\",\n", + " value=ner_annotation_with_subclass,\n", " classifications=classifications)\n", "\n", "#re-write the entity source and target annotations withe text selectios\n", @@ -935,15 +979,14 @@ "entity_target_doc = lb_types.DocumentEntity(name=\"named_entity\", text_selections=text_selections_target)\n", "entity_target = lb_types.ObjectAnnotation(name=\"named_entity\", value=entity_target_doc)\n", "\n", - "# re-write the entity relationship with the re-created entities \n", + "# re-write the entity relationship with the re-created entities\n", "entity_relationship = lb_types.RelationshipAnnotation(\n", " name=\"relationship\",\n", " value=lb_types.Relationship(\n", " source=entity_source,\n", " target=entity_target,\n", " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ))\n", - " " + " ))\n" ], "cell_type": "code", "outputs": [], @@ -952,7 +995,7 @@ { "metadata": {}, "source": [ - "# Final NDJSON and python annotations \n", + "# Final NDJSON and python annotations\n", "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n", "print(f\"entities_annotation={entities_annotation}\")\n", "print(f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\")\n", @@ -986,19 +1029,19 @@ " annotations = [\n", " entities_annotation,\n", " checklist_annotation,\n", - " nested_checklist_annotation, \n", + " nested_checklist_annotation,\n", " text_annotation,\n", " radio_annotation,\n", " nested_radio_annotation,\n", " bbox_annotation,\n", " bbox_with_radio_subclass_annotation,\n", " ner_with_checklist_subclass_annotation,\n", - " entity_source, \n", - " entity_target, \n", - " entity_relationship,# Only supported for MAL imports \n", + " entity_source,\n", + " entity_target,\n", + " entity_relationship,# Only supported for MAL imports\n", " bbox_source,\n", " bbox_target,\n", - " bbox_relationship # Only supported for MAL imports \n", + " bbox_relationship # Only supported for MAL imports\n", " ]\n", " )\n", ")" @@ -1030,12 +1073,12 @@ " bbox_annotation_ndjson,\n", " bbox_with_radio_subclass_annotation_ndjson,\n", " ner_with_checklist_subclass_annotation_ndjson,\n", - " entity_source_ndjson, \n", - " entity_target_ndjson, \n", - " ner_relationship_annotation_ndjson, # Only supported for MAL imports \n", + " entity_source_ndjson,\n", + " entity_target_ndjson,\n", + " ner_relationship_annotation_ndjson, # Only supported for MAL imports\n", " bbox_source_ndjson,\n", " bbox_target_ndjson,\n", - " bbox_relationship_annotation_ndjson # Only supported for MAL imports \n", + " bbox_relationship_annotation_ndjson # Only supported for MAL imports\n", " ]:\n", " annot.update({\n", " \"dataRow\": {\"globalKey\": global_key},\n", @@ -1094,10 +1137,10 @@ "## Relationships are not currently supported for label import\n", "\n", "# upload_job = lb.LabelImport.create_from_objects(\n", - "# client = client, \n", - "# project_id = project.uid, \n", - "# name=\"label_import_job\"+str(uuid.uuid4()), \n", - "# labels=labels)\n", + "# client = client,\n", + "# project_id = project.uid,\n", + "# name=\"label_import_job\"+str(uuid.uuid4()),\n", + "# labels=labels) ## Remove unsupported relationships from the labels list\n", "\n", "# print(\"Errors:\", upload_job.errors)\n", "# print(\"Status of uploads: \", upload_job.statuses)" diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index 877cea78d..abe02fd86 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -47,7 +47,6 @@ " * HTML \n", " * DICOM \n", " * Conversational\n", - "* A data row is a member of a dataset \n", "* A data row cannot exist without belonging to a dataset.\n", "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects" ], @@ -162,7 +161,6 @@ "\n", "if export_task.has_errors():\n", " export_task.get_stream(\n", - " \n", " converter=lb.JsonConverter(),\n", " stream_type=lb.StreamType.ERRORS\n", " ).start(stream_handler=lambda error: print(error))\n", @@ -249,7 +247,7 @@ " schema_id=mdo.reserved_by_name[\"tag\"].uid, # specify the schema id\n", " value=\"tag_string\", # typed inputs\n", " ),\n", - " ], \n", + " ],\n", " )" ], "cell_type": "code", @@ -330,6 +328,10 @@ " {\n", " \"type\": \"HTML\",\n", " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\"\n", + " },\n", + " {\n", + " \"type\": \"PDF_URL\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", " }\n", " ]\n", " }])\n", @@ -413,7 +415,7 @@ { "metadata": {}, "source": [ - "# You can only create one attachment at the time. \n", + "# You can only create one attachment at the time.\n", "data_row.create_attachment(attachment_type=\"RAW_TEXT\",\n", " attachment_value=\"LABELERS WILL SEE THIS \")" ], diff --git a/examples/exports/composite_mask_export.ipynb b/examples/exports/composite_mask_export.ipynb index 5a352b3e5..dc46810fd 100644 --- a/examples/exports/composite_mask_export.ipynb +++ b/examples/exports/composite_mask_export.ipynb @@ -103,7 +103,7 @@ "```json \n", " {\n", " \"composite_mask\": {\n", - " \"url\": \"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}\",\n", + " \"url\": \"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1\",\n", " \"color_rgb\": [\n", " 142,\n", " 220,\n", @@ -113,7 +113,7 @@ " }\n", "```\n", " - ***Video example*** :\n", - " The export will adhere to the following URL convention by default. However, the image's URL convention is also considered valid.\n", + " The export will adhere to the following URL convention by default.\n", "```json\n", " {\n", " \"composite_mask\": {\n", @@ -138,7 +138,7 @@ "task_id = \"\"\n", "composite_mask_id = \"\"\n", "\n", - "mask_url = f'https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}'\n", + "mask_url = f'https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1'\n", "req = urllib.request.Request(mask_url, headers=client.headers)\n", "image = Image.open(urllib.request.urlopen(req))\n", "w, h = image.size\n", @@ -150,15 +150,15 @@ "cell_type": "code", "outputs": [ { + "output_type": "execute_result", "data": { "image/png": "", "text/plain": [ "" ] }, - "execution_count": 15, "metadata": {}, - "output_type": "execute_result" + "execution_count": 15 } ], "execution_count": null @@ -177,7 +177,7 @@ " \"annotation_kind\": \"ImageSegmentationMask\",\n", " \"classifications\": [],\n", " \"composite_mask\": {\n", - " \"url\": \"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}\",\n", + " \"url\": \"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1\",\n", " \"color_rgb\": [\n", " 123,\n", " 103,\n", diff --git a/examples/prediction_upload/pdf_predictions.ipynb b/examples/prediction_upload/pdf_predictions.ipynb index 95aa0e39a..744bb10fa 100644 --- a/examples/prediction_upload/pdf_predictions.ipynb +++ b/examples/prediction_upload/pdf_predictions.ipynb @@ -43,6 +43,7 @@ "- Free text classifications\n", "- Bounding box\n", "- Entities\n", + "- Relationships (only supported for MAL imports)\n", "\n", "\n", "*NDJson*\n", @@ -50,7 +51,8 @@ "- Radio classifications (including nested classifications)\n", "- Free text classifications\n", "- Bounding box \n", - "- Entities " + "- Entities \n", + "- Relationships (only supported for MAL imports)" ], "cell_type": "markdown" }, @@ -73,9 +75,11 @@ { "metadata": {}, "source": [ + "import uuid\n", + "import json\n", + "import requests\n", "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "import uuid" + "import labelbox.types as lb_types" ], "cell_type": "code", "outputs": [], @@ -127,7 +131,7 @@ ")\n", "\n", "# NDJSON\n", - "entities_prediction_ndjson = { \n", + "entities_prediction_ndjson = {\n", " \"name\": \"named_entity\",\n", " \"confidence\": 0.5,\n", " \"textSelections\": [\n", @@ -150,10 +154,10 @@ "source": [ "########### Radio Classification #########\n", "\n", - "# Annotation types \n", + "# Annotation types\n", "radio_prediction = lb_types.ClassificationAnnotation(\n", " name=\"radio_question\",\n", - " value=lb_types.Radio(answer = \n", + " value=lb_types.Radio(answer =\n", " lb_types.ClassificationAnswer(name = \"first_radio_answer\", confidence=0.5)\n", " )\n", ")\n", @@ -172,7 +176,7 @@ "source": [ "############ Checklist Classification ###########\n", "\n", - "# Annotation types \n", + "# Annotation types\n", "checklist_prediction = lb_types.ClassificationAnnotation(\n", " name=\"checklist_question\",\n", " value=lb_types.Checklist(answer = [\n", @@ -209,7 +213,7 @@ "bbox_prediction = lb_types.ObjectAnnotation(\n", " name=\"bounding_box\", # must match your ontology feature\"s name\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"], y=bbox_dim_1[\"top\"]), # x = left, y = top \n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"], y=bbox_dim_1[\"top\"]), # x = left, y = top\n", " end=lb_types.Point(x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"], y=bbox_dim_1[\"top\"]+ bbox_dim_1[\"height\"]), # x= left + width , y = top + height\n", " page=0,\n", " unit=lb_types.RectangleUnit.POINTS\n", @@ -237,14 +241,14 @@ " value=lb_types.Checklist(\n", " answer=[lb_types.ClassificationAnswer(\n", " name=\"first_checklist_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer \n", + " confidence=0.5, # Confidence scores should be added to the answer\n", " classifications=[\n", " lb_types.ClassificationAnnotation(\n", " name=\"sub_checklist_question\",\n", " value=lb_types.Checklist(\n", " answer=[lb_types.ClassificationAnswer(\n", " name=\"first_sub_checklist_answer\",\n", - " confidence=0.5 # Confidence scores should be added to the answer \n", + " confidence=0.5 # Confidence scores should be added to the answer\n", " )]\n", " ))\n", " ]\n", @@ -255,17 +259,17 @@ "nested_checklist_prediction_ndjson = {\n", " \"name\": \"nested_checklist_question\",\n", " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\", \n", - " \"confidence\": 0.5, # Confidence scores should be added to the answer \n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5, # Confidence scores should be added to the answer\n", " \"classifications\" : [\n", " {\n", - " \"name\": \"sub_checklist_question\", \n", + " \"name\": \"sub_checklist_question\",\n", " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\", \n", - " \"confidence\": 0.5, # Confidence scores should be added to the answer \n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5, # Confidence scores should be added to the answer\n", " }\n", - " } \n", - " ] \n", + " }\n", + " ]\n", " }]\n", "}\n", "\n", @@ -274,14 +278,14 @@ " value=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"first_radio_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer \n", + " confidence=0.5, # Confidence scores should be added to the answer\n", " classifications=[\n", " lb_types.ClassificationAnnotation(\n", " name=\"sub_radio_question\",\n", " value=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"first_sub_radio_answer\",\n", - " confidence=0.5 # Confidence scores should be added to the answer \n", + " confidence=0.5 # Confidence scores should be added to the answer\n", " )\n", " )\n", " )\n", @@ -298,7 +302,7 @@ " \"confidence\": 0.5,\n", " \"classifications\": [{\n", " \"name\":\"sub_radio_question\",\n", - " \"answer\": { \"name\" : \"first_sub_radio_answer\", \n", + " \"answer\": { \"name\" : \"first_sub_radio_answer\",\n", " \"confidence\": 0.5}\n", " }]\n", " }\n", @@ -312,7 +316,7 @@ { "metadata": {}, "source": [ - "############## Classification Free-form text ############## \n", + "############## Classification Free-form text ##############\n", "\n", "text_prediction = lb_types.ClassificationAnnotation(\n", " name=\"free_text\", # must match your ontology feature\"s name\n", @@ -344,9 +348,9 @@ "\n", "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", " name=\"bbox_with_radio_subclass\",\n", - " confidence=0.5, \n", + " confidence=0.5,\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"], y=bbox_dim[\"top\"]), # x = left, y = top \n", + " start=lb_types.Point(x=bbox_dim[\"left\"], y=bbox_dim[\"top\"]), # x = left, y = top\n", " end=lb_types.Point(x=bbox_dim[\"left\"] + bbox_dim[\"width\"], y=bbox_dim[\"top\"] + bbox_dim[\"height\"]), # x= left + width , y = top + height\n", " unit=lb_types.RectangleUnit.POINTS,\n", " page=1\n", @@ -357,7 +361,7 @@ " \t\tvalue=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"first_sub_radio_answer\",\n", - " confidence=0.5, \n", + " confidence=0.5,\n", " classifications=[\n", " lb_types.ClassificationAnnotation(\n", " name=\"second_sub_radio_question\",\n", @@ -379,13 +383,13 @@ " \"name\": \"bbox_with_radio_subclass\",\n", " \"classifications\": [\n", " {\n", - " \"name\": \"sub_radio_question\", \n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\", \n", + " \"name\": \"first_sub_radio_answer\",\n", " \"confidence\": 0.5,\n", " \"classifications\": [\n", " {\n", - " \"name\": \"second_sub_radio_question\", \n", + " \"name\": \"second_sub_radio_question\",\n", " \"answer\": {\n", " \"name\": \"second_sub_radio_answer\", \"confidence\": 0.5}\n", " }\n", @@ -406,12 +410,12 @@ "metadata": {}, "source": [ "\n", - "############ NER with nested classifications ######## \n", + "############ NER with nested classifications ########\n", "\n", "\n", "ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n", " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5, \n", + " confidence=0.5,\n", " value=lb_types.DocumentEntity(\n", " name=\"ner_with_checklist_subclass\",\n", " text_selections=[\n", @@ -438,7 +442,7 @@ " \"classifications\":[\n", " {\n", " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\"name\": \"first_sub_checklist_answer\", \"confidence\":0.5 }] \n", + " \"answer\": [{\"name\": \"first_sub_checklist_answer\", \"confidence\":0.5 }]\n", " }\n", " ],\n", " \"textSelections\": [\n", @@ -449,9 +453,9 @@ " \"groupId\": \"\",\n", " \"page\": 1\n", " }\n", - " ] \n", + " ]\n", "}\n", - " \n" + "\n" ], "cell_type": "code", "outputs": [], @@ -467,12 +471,27 @@ { "metadata": {}, "source": [ - "## Text layer url is required for uploading entity annotations\n", + "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", + "\n", + "However, it's important to note that Google Document AI imposes specific restrictions on document size:\n", + "- The document must have no more than 15 pages.\n", + "- The file size should not exceed 20 MB.\n", + "\n", + "Furthermore, Google Document AI optimizes documents before OCR processing. This optimization might include rotating images or pages to ensure that text appears horizontally. Consequently, token coordinates are calculated based on the rotated/optimized images, resulting in potential discrepancies with the original PDF document.\n", + "\n", + "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", + "\n", + "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ "global_key = \"0801.3483.pdf\"\n", "img_url = {\n", " \"row_data\": {\n", - " \"pdf_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n", - " \"text_layer_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json\"\n", + " \"pdf_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", " },\n", " \"global_key\": global_key\n", "}\n", @@ -481,8 +500,15 @@ "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", "task = dataset.create_data_rows([img_url])\n", "task.wait_till_done()\n", - "print(\"Errors:\",task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if 'Duplicate global key' in error['message'] and dataset.row_count == 0:\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" ], "cell_type": "code", "outputs": [], @@ -503,9 +529,9 @@ "\n", "ontology_builder = lb.OntologyBuilder(\n", " classifications=[ # List of Classification objects\n", - " lb.Classification( \n", + " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\", \n", + " name=\"radio_question\",\n", " scope = lb.Classification.Scope.GLOBAL,\n", " options=[\n", " lb.Option(value=\"first_radio_answer\"),\n", @@ -514,13 +540,13 @@ " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\", \n", + " name=\"checklist_question\",\n", " scope = lb.Classification.Scope.GLOBAL,\n", " options=[\n", " lb.Option(value=\"first_checklist_answer\"),\n", " lb.Option(value=\"second_checklist_answer\")\n", " ]\n", - " ), \n", + " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.TEXT,\n", " name=\"free_text\",\n", @@ -550,15 +576,15 @@ " options=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\", \n", + " name=\"sub_checklist_question\",\n", " options=[lb.Option(\"first_sub_checklist_answer\")]\n", " )\n", " ])\n", " ]\n", - " ), \n", + " ),\n", " ],\n", " tools=[ # List of Tool objects\n", - " lb.Tool( tool=lb.Tool.Type.BBOX,name=\"bounding_box\"), \n", + " lb.Tool( tool=lb.Tool.Type.BBOX,name=\"bounding_box\"),\n", " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", " lb.Tool(tool=lb.Tool.Type.NER,\n", " name=\"ner_with_checklist_subclass\",\n", @@ -610,7 +636,7 @@ "metadata": {}, "source": [ "# create Model\n", - "model = client.create_model(name=\"PDF_model_run_\"+ str(uuid.uuid4()), \n", + "model = client.create_model(name=\"PDF_model_run_\"+ str(uuid.uuid4()),\n", " ontology_id=ontology.uid)\n", "# create Model Run\n", "model_run = model.create_model_run(\"iteration 1\")" @@ -650,19 +676,38 @@ { "metadata": {}, "source": [ - "First, we need to populate the text selections for Entity annotations\n", - "To learn how to generate a text layer for your documents please refer to the following repositories/files: \n", - "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/gcloud/gcp-vision-to-lb-text-layer.py\n", - "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/adobe/adobe-ocr-to-lb-text-layer.py" + "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "import requests\n", - "import json\n", + "To extract the generated text layer url we first need to export the data row" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "client.enable_experimental = True\n", + "task = lb.DataRow.export(client=client,global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_stream()\n", "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = json.loads(output.json_str)\n", + " text_layer = output_json['media_attributes']['text_layer_url']\n", + "print(text_layer)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ "# Helper method\n", "def update_text_selections(annotation, group_id, list_tokens, page):\n", " return annotation.update({\n", @@ -674,15 +719,14 @@ " }\n", " ]\n", " })\n", - " \n", "\n", - "text_layer = \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json\"\n", "\n", "# Fetch the content of the text layer\n", - "res = requests.get(text_layer) \n", + "res = requests.get(text_layer)\n", "\n", "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\"Metal-insulator (MI) transitions have been one of the\", \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\"]\n", + "content_phrases = [\"Metal-insulator (MI) transitions have been one of the\",\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\"]\n", "\n", "# Parse the text layer\n", "text_selections = []\n", @@ -698,7 +742,7 @@ " text_selections.append(document_text_selection)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=entities_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=list_tokens, # ids representing individual words from the group\n", " page=1)\n", " if group[\"content\"] == content_phrases[1]:\n", @@ -708,17 +752,17 @@ " text_selections_ner.append(ner_text_selection)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=ner_with_checklist_subclass_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=list_tokens_2, # ids representing individual words from the group\n", " page=1)\n", "\n", "\n", - "#re-write the entity annotation with text selections \n", + "#re-write the entity annotation with text selections\n", "entities_prediction_document_entity = lb_types.DocumentEntity(name=\"named_entity\",confidence=0.5, textSelections = text_selections)\n", "entities_prediction = lb_types.ObjectAnnotation(name=\"named_entity\",value=entities_prediction_document_entity)\n", "\n", "\n", - "# re-write the entity annotation + subclassification with text selections \n", + "# re-write the entity annotation + subclassification with text selections\n", "classifications = [\n", " lb_types.ClassificationAnnotation(\n", " name=\"sub_checklist_question\",\n", @@ -728,12 +772,12 @@ " )\n", " ]\n", "ner_annotation_with_subclass = lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",confidence=0.5, textSelections= text_selections_ner)\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name=\"ner_with_checklist_subclass\", \n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name=\"ner_with_checklist_subclass\",\n", " confidence=0.5,\n", - " value=ner_annotation_with_subclass, \n", + " value=ner_annotation_with_subclass,\n", " classifications=classifications)\n", "\n", - "# Final NDJSON and python annotations \n", + "# Final NDJSON and python annotations\n", "print(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\n", "print(f\"entities_annotation={entities_prediction}\")\n", "print(f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\")\n", @@ -762,7 +806,7 @@ " annotations = [\n", " entities_prediction,\n", " checklist_prediction,\n", - " nested_checklist_prediction, \n", + " nested_checklist_prediction,\n", " text_prediction,\n", " radio_prediction,\n", " nested_radio_prediction,\n", @@ -851,7 +895,7 @@ { "metadata": {}, "source": [ - "project = client.create_project(name=\"Document Prediction Import Demo\", \n", + "project = client.create_project(name=\"Document Prediction Import Demo\",\n", " media_type=lb.MediaType.Document)\n", "project.setup_editor(ontology)" ], @@ -899,7 +943,7 @@ "\n", "radio_annotation = lb_types.ClassificationAnnotation(\n", " name=\"radio_question\",\n", - " value=lb_types.Radio(answer = \n", + " value=lb_types.Radio(answer =\n", " lb_types.ClassificationAnswer(name = \"first_radio_answer\")\n", " )\n", ")\n", @@ -908,7 +952,7 @@ " name=\"checklist_question\",\n", " value=lb_types.Checklist(answer = [\n", " lb_types.ClassificationAnswer(name = \"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name = \"second_checklist_answer\"), \n", + " lb_types.ClassificationAnswer(name = \"second_checklist_answer\"),\n", " ])\n", " )\n", "\n", @@ -921,7 +965,7 @@ "bbox_annotation = lb_types.ObjectAnnotation(\n", " name=\"bounding_box\", # must match your ontology feature\"s name\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"], y=bbox_dim_1[\"top\"]), # x = left, y = top \n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"], y=bbox_dim_1[\"top\"]), # x = left, y = top\n", " end=lb_types.Point(x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"], y=bbox_dim_1[\"top\"]+ bbox_dim_1[\"height\"]), # x= left + width , y = top + height\n", " page=0,\n", " unit=lb_types.RectangleUnit.POINTS\n", @@ -966,7 +1010,7 @@ ")\n", "\n", "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", \n", + " name=\"free_text\",\n", " value=lb_types.Text(answer=\"sample text\")\n", ")\n", "\n", @@ -979,9 +1023,9 @@ "\n", "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", " name=\"bbox_with_radio_subclass\",\n", - " \n", + "\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"], y=bbox_dim[\"top\"]), # x = left, y = top \n", + " start=lb_types.Point(x=bbox_dim[\"left\"], y=bbox_dim[\"top\"]), # x = left, y = top\n", " end=lb_types.Point(x=bbox_dim[\"left\"] + bbox_dim[\"width\"], y=bbox_dim[\"top\"] + bbox_dim[\"height\"]), # x= left + width , y = top + height\n", " unit=lb_types.RectangleUnit.POINTS,\n", " page=1\n", @@ -992,11 +1036,11 @@ " \t\tvalue=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"first_sub_radio_answer\",\n", - " \n", + "\n", " classifications=[\n", " lb_types.ClassificationAnnotation(\n", " name=\"second_sub_radio_question\",\n", - " \n", + "\n", " value=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"second_sub_radio_answer\"\n", @@ -1049,7 +1093,7 @@ " annotations = [\n", " entities_annotation,\n", " checklist_annotation,\n", - " nested_checklist_annotation, \n", + " nested_checklist_annotation,\n", " text_annotation,\n", " radio_annotation,\n", " nested_radio_annotation,\n",