From 77b64ee85b8856adc43df37a129f1b6b9290a79c Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Wed, 13 Mar 2024 17:27:29 -0400 Subject: [PATCH 1/9] added attachment PDF example, removed requirements to include text layer url --- examples/annotation_import/pdf.ipynb | 186 +++++++++++++++++---------- examples/basics/data_rows.ipynb | 10 +- 2 files changed, 122 insertions(+), 74 deletions(-) diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb index f0a012e5c..502656bab 100644 --- a/examples/annotation_import/pdf.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -79,6 +79,7 @@ "metadata": {}, "source": [ "import uuid\n", + "import json\n", "import labelbox as lb\n", "import labelbox.types as lb_types\n" ], @@ -133,7 +134,7 @@ ")\n", "\n", "# NDJSON\n", - "entities_annotations_ndjson = { \n", + "entities_annotations_ndjson = {\n", " \"name\": \"named_entity\",\n", " \"textSelections\": [\n", " {\n", @@ -155,10 +156,10 @@ "source": [ "########### Radio Classification #########\n", "\n", - "# Annotation types \n", + "# Annotation types\n", "radio_annotation = lb_types.ClassificationAnnotation(\n", " name=\"radio_question\",\n", - " value=lb_types.Radio(answer = \n", + " value=lb_types.Radio(answer =\n", " lb_types.ClassificationAnswer(name = \"first_radio_answer\")\n", " )\n", ")\n", @@ -177,7 +178,7 @@ "source": [ "############ Checklist Classification ###########\n", "\n", - "# Annotation types \n", + "# Annotation types\n", "checklist_annotation = lb_types.ClassificationAnnotation(\n", " name=\"checklist_question\",\n", " value=lb_types.Checklist(answer = [\n", @@ -208,7 +209,7 @@ "bbox_annotation = lb_types.ObjectAnnotation(\n", " name=\"bounding_box\", # must match your ontology feature\"s name\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top \n", + " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n", " end=lb_types.Point(x=518.571, y=245.143), # x= left + width , y = top + height\n", " page=0,\n", " unit=lb_types.RectangleUnit.POINTS\n", @@ -257,13 +258,13 @@ "nested_checklist_annotation_ndjson = {\n", " \"name\": \"nested_checklist_question\",\n", " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\", \n", + " \"name\": \"first_checklist_answer\",\n", " \"classifications\" : [\n", " {\n", - " \"name\": \"sub_checklist_question\", \n", + " \"name\": \"sub_checklist_question\",\n", " \"answer\": {\"name\": \"first_sub_checklist_answer\"}\n", - " } \n", - " ] \n", + " }\n", + " ]\n", " }]\n", "}\n", "\n", @@ -306,7 +307,7 @@ { "metadata": {}, "source": [ - "############## Classification Free-form text ############## \n", + "############## Classification Free-form text ##############\n", "\n", "text_annotation = lb_types.ClassificationAnnotation(\n", " name=\"free_text\", # must match your ontology feature\"s name\n", @@ -331,7 +332,7 @@ "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", " name=\"bbox_with_radio_subclass\",\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top \n", + " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n", " end=lb_types.Point(x=566.657, y=420.986), # x= left + width , y = top + height\n", " unit=lb_types.RectangleUnit.POINTS,\n", " page=1\n", @@ -362,12 +363,12 @@ " \"name\": \"bbox_with_radio_subclass\",\n", " \"classifications\": [\n", " {\n", - " \"name\": \"sub_radio_question\", \n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\", \n", + " \"name\": \"first_sub_radio_answer\",\n", " \"classifications\": [\n", " {\n", - " \"name\": \"second_sub_radio_question\", \n", + " \"name\": \"second_sub_radio_question\",\n", " \"answer\": {\n", " \"name\": \"second_sub_radio_answer\"}\n", " }\n", @@ -392,7 +393,7 @@ { "metadata": {}, "source": [ - "############ NER with nested classifications ######## \n", + "############ NER with nested classifications ########\n", "\n", "\n", "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", @@ -423,7 +424,7 @@ " \"classifications\":[\n", " {\n", " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\"name\": \"first_sub_checklist_answer\"}] \n", + " \"answer\": [{\"name\": \"first_sub_checklist_answer\"}]\n", " }\n", " ],\n", " \"textSelections\": [\n", @@ -434,9 +435,9 @@ " \"groupId\": \"\",\n", " \"page\": 1\n", " }\n", - " ] \n", + " ]\n", "}\n", - " \n" + "\n" ], "cell_type": "code", "outputs": [], @@ -445,7 +446,7 @@ { "metadata": {}, "source": [ - "######### Relationships ########## \n", + "######### Relationships ##########\n", "entity_source = lb_types.ObjectAnnotation(\n", " name=\"named_entity\",\n", " value= lb_types.DocumentEntity(\n", @@ -482,7 +483,7 @@ " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", " ))\n", "\n", - "## Only supported for MAL imports \n", + "## Only supported for MAL imports\n", "uuid_source = str(uuid.uuid4())\n", "uuid_target = str(uuid.uuid4())\n", "\n", @@ -498,7 +499,7 @@ " \"page\": 1\n", " }\n", " ]\n", - " \n", + "\n", "}\n", "\n", "entity_target_ndjson = {\n", @@ -515,7 +516,7 @@ " ]\n", "}\n", "ner_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\", \n", + " \"name\": \"relationship\",\n", " \"relationship\": {\n", " \"source\": uuid_source,\n", " \"target\": uuid_target,\n", @@ -537,7 +538,7 @@ "bbox_source = lb_types.ObjectAnnotation(\n", " name=\"bounding_box\",\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top \n", + " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n", " end=lb_types.Point(x=270.907, y=149.556), # x = left + width , y = top + height\n", " unit=lb_types.RectangleUnit.POINTS,\n", " page=1\n", @@ -563,7 +564,7 @@ " ))\n", "\n", "\n", - "## Only supported for MAL imports \n", + "## Only supported for MAL imports\n", "uuid_source_2 = str(uuid.uuid4())\n", "uuid_target_2 = str(uuid.uuid4())\n", "\n", @@ -594,7 +595,7 @@ "}\n", "\n", "bbox_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\", \n", + " \"name\": \"relationship\",\n", " \"relationship\": {\n", " \"source\": uuid_source_2,\n", " \"target\": uuid_target_2,\n", @@ -623,12 +624,27 @@ { "metadata": {}, "source": [ - "## Text layer url is required for uploading entity annotations\n", - "global_key = \"0801.3483.pdf\"\n", + "Passing a `text_layer_url` is not longer required. Labelbox automatically generates one using Google Document AI and its OCR engine to detect tokens. \n", + "\n", + "However, it's important to note that Google Document AI imposes specific restrictions on document size:\n", + "- The document must have no more than 15 pages.\n", + "- The file size should not exceed 20 MB.\n", + "\n", + "Furthermore, Google Document AI optimizes documents before OCR processing. This optimization might include rotating images or pages to ensure that text appears horizontally. Consequently, token coordinates are calculated based on the rotated/optimized images, resulting in potential discrepancies with the original PDF document.\n", + "\n", + "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", + "\n", + "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "global_key = \"0801.3483_doc.pdf\"\n", "img_url = {\n", " \"row_data\": {\n", - " \"pdf_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n", - " \"text_layer_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json\"\n", + " \"pdf_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", " },\n", " \"global_key\": global_key\n", "}\n", @@ -637,8 +653,15 @@ "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", "task = dataset.create_data_rows([img_url])\n", "task.wait_till_done()\n", - "print(\"Errors:\",task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if 'Duplicate global key' in error['message'] and dataset.row_count == 0:\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" ], "cell_type": "code", "outputs": [], @@ -659,9 +682,9 @@ "\n", "ontology_builder = lb.OntologyBuilder(\n", " classifications=[ # List of Classification objects\n", - " lb.Classification( \n", + " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\", \n", + " name=\"radio_question\",\n", " scope = lb.Classification.Scope.GLOBAL,\n", " options=[\n", " lb.Option(value=\"first_radio_answer\"),\n", @@ -670,13 +693,13 @@ " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\", \n", + " name=\"checklist_question\",\n", " scope = lb.Classification.Scope.GLOBAL,\n", " options=[\n", " lb.Option(value=\"first_checklist_answer\"),\n", " lb.Option(value=\"second_checklist_answer\")\n", " ]\n", - " ), \n", + " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.TEXT,\n", " name=\"free_text\",\n", @@ -706,15 +729,15 @@ " options=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\", \n", + " name=\"sub_checklist_question\",\n", " options=[lb.Option(\"first_sub_checklist_answer\")]\n", " )\n", " ])\n", " ]\n", - " ), \n", + " ),\n", " ],\n", " tools=[ # List of Tool objects\n", - " lb.Tool( tool=lb.Tool.Type.BBOX,name=\"bounding_box\"), \n", + " lb.Tool( tool=lb.Tool.Type.BBOX,name=\"bounding_box\"),\n", " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP,name=\"relationship\"),\n", " lb.Tool(tool=lb.Tool.Type.NER,\n", @@ -767,7 +790,7 @@ "metadata": {}, "source": [ "# Create a Labelbox project\n", - "project = client.create_project(name=\"PDF_annotation_demo\", \n", + "project = client.create_project(name=\"PDF_annotation_demo\",\n", " media_type=lb.MediaType.Document)\n", "project.setup_editor(ontology)" ], @@ -811,12 +834,36 @@ "metadata": {}, "source": [ "##### Step 5.1: First, we need to populate the text selections for Entity annotations\n", - "To learn how to generate a text layer for your documents please refer to the following repositories/files: \n", - "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/gcloud/gcp-vision-to-lb-text-layer.py\n", - "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/adobe/adobe-ocr-to-lb-text-layer.py\n" + "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "To extract the generated text layer url we first need to export the data row" ], "cell_type": "markdown" }, + { + "metadata": {}, + "source": [ + "\n", + "client.enable_experimental = True\n", + "task = lb.DataRow.export(client=client,global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_stream()\n", + "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = json.loads(output.json_str)\n", + " text_layer = output_json['media_attributes']['text_layer_url']\n", + "print(text_layer)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, { "metadata": {}, "source": [ @@ -834,16 +881,14 @@ " }\n", " ]\n", " })\n", - " \n", "\n", - "text_layer = \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json\"\n", "\n", "# Fetch the content of the text layer\n", - "res = requests.get(text_layer) \n", + "res = requests.get(text_layer)\n", "\n", "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\"Metal-insulator (MI) transitions have been one of the\" , \n", - " \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\", \n", + "content_phrases = [\"Metal-insulator (MI) transitions have been one of the\" ,\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", " \"Organic charge transfer salts based on the donor\",\n", " \"the experimental investigations on this issue have not\"]\n", "\n", @@ -862,7 +907,7 @@ " text_selections.append(document_text_selection)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=entities_annotations_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=list_tokens, # ids representing individual words from the group\n", " page=1)\n", " if group[\"content\"] == content_phrases[1]:\n", @@ -872,7 +917,7 @@ " text_selections_ner.append(ner_text_selection)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=ner_with_checklist_subclass_annotation_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=list_tokens_2, # ids representing individual words from the group\n", " page=1)\n", " if group[\"content\"] == content_phrases[2]:\n", @@ -882,7 +927,7 @@ " text_selections_source.append(text_selection_entity_source)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=entity_source_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=relationship_source, # ids representing individual words from the group\n", " page=1)\n", " if group[\"content\"] == content_phrases[3]:\n", @@ -892,7 +937,7 @@ " text_selections_target.append(text_selection_entity_target)\n", " # build text selections forthe NDJson annotations\n", " update_text_selections(annotation=entity_target_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=relationship_target, # ids representing individual words from the group\n", " page=1)\n" ], @@ -910,11 +955,11 @@ { "metadata": {}, "source": [ - "#re-write the entity annotation with text selections \n", + "#re-write the entity annotation with text selections\n", "entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", textSelections = text_selections)\n", "entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",value=entities_annotation_document_entity)\n", "\n", - "# re-write the entity annotation + subclassification with text selections \n", + "# re-write the entity annotation + subclassification with text selections\n", "classifications = [\n", " lb_types.ClassificationAnnotation(\n", " name=\"sub_checklist_question\",\n", @@ -924,8 +969,8 @@ " )\n", " ]\n", "ner_annotation_with_subclass = lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\", textSelections= text_selections_ner)\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name=\"ner_with_checklist_subclass\", \n", - " value=ner_annotation_with_subclass, \n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name=\"ner_with_checklist_subclass\",\n", + " value=ner_annotation_with_subclass,\n", " classifications=classifications)\n", "\n", "#re-write the entity source and target annotations withe text selectios\n", @@ -935,15 +980,14 @@ "entity_target_doc = lb_types.DocumentEntity(name=\"named_entity\", text_selections=text_selections_target)\n", "entity_target = lb_types.ObjectAnnotation(name=\"named_entity\", value=entity_target_doc)\n", "\n", - "# re-write the entity relationship with the re-created entities \n", + "# re-write the entity relationship with the re-created entities\n", "entity_relationship = lb_types.RelationshipAnnotation(\n", " name=\"relationship\",\n", " value=lb_types.Relationship(\n", " source=entity_source,\n", " target=entity_target,\n", " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ))\n", - " " + " ))\n" ], "cell_type": "code", "outputs": [], @@ -952,7 +996,7 @@ { "metadata": {}, "source": [ - "# Final NDJSON and python annotations \n", + "# Final NDJSON and python annotations\n", "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n", "print(f\"entities_annotation={entities_annotation}\")\n", "print(f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\")\n", @@ -986,19 +1030,19 @@ " annotations = [\n", " entities_annotation,\n", " checklist_annotation,\n", - " nested_checklist_annotation, \n", + " nested_checklist_annotation,\n", " text_annotation,\n", " radio_annotation,\n", " nested_radio_annotation,\n", " bbox_annotation,\n", " bbox_with_radio_subclass_annotation,\n", " ner_with_checklist_subclass_annotation,\n", - " entity_source, \n", - " entity_target, \n", - " entity_relationship,# Only supported for MAL imports \n", + " entity_source,\n", + " entity_target,\n", + " entity_relationship,# Only supported for MAL imports\n", " bbox_source,\n", " bbox_target,\n", - " bbox_relationship # Only supported for MAL imports \n", + " bbox_relationship # Only supported for MAL imports\n", " ]\n", " )\n", ")" @@ -1030,12 +1074,12 @@ " bbox_annotation_ndjson,\n", " bbox_with_radio_subclass_annotation_ndjson,\n", " ner_with_checklist_subclass_annotation_ndjson,\n", - " entity_source_ndjson, \n", - " entity_target_ndjson, \n", - " ner_relationship_annotation_ndjson, # Only supported for MAL imports \n", + " entity_source_ndjson,\n", + " entity_target_ndjson,\n", + " ner_relationship_annotation_ndjson, # Only supported for MAL imports\n", " bbox_source_ndjson,\n", " bbox_target_ndjson,\n", - " bbox_relationship_annotation_ndjson # Only supported for MAL imports \n", + " bbox_relationship_annotation_ndjson # Only supported for MAL imports\n", " ]:\n", " annot.update({\n", " \"dataRow\": {\"globalKey\": global_key},\n", @@ -1094,9 +1138,9 @@ "## Relationships are not currently supported for label import\n", "\n", "# upload_job = lb.LabelImport.create_from_objects(\n", - "# client = client, \n", - "# project_id = project.uid, \n", - "# name=\"label_import_job\"+str(uuid.uuid4()), \n", + "# client = client,\n", + "# project_id = project.uid,\n", + "# name=\"label_import_job\"+str(uuid.uuid4()),\n", "# labels=labels)\n", "\n", "# print(\"Errors:\", upload_job.errors)\n", diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index 877cea78d..b2bcd843b 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -162,7 +162,7 @@ "\n", "if export_task.has_errors():\n", " export_task.get_stream(\n", - " \n", + "\n", " converter=lb.JsonConverter(),\n", " stream_type=lb.StreamType.ERRORS\n", " ).start(stream_handler=lambda error: print(error))\n", @@ -249,7 +249,7 @@ " schema_id=mdo.reserved_by_name[\"tag\"].uid, # specify the schema id\n", " value=\"tag_string\", # typed inputs\n", " ),\n", - " ], \n", + " ],\n", " )" ], "cell_type": "code", @@ -330,6 +330,10 @@ " {\n", " \"type\": \"HTML\",\n", " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\"\n", + " },\n", + " {\n", + " \"type\": \"PDF_URL\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", " }\n", " ]\n", " }])\n", @@ -413,7 +417,7 @@ { "metadata": {}, "source": [ - "# You can only create one attachment at the time. \n", + "# You can only create one attachment at the time.\n", "data_row.create_attachment(attachment_type=\"RAW_TEXT\",\n", " attachment_value=\"LABELERS WILL SEE THIS \")" ], From 90679b16e9c273da1b9845e58165ddab2c63e745 Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Thu, 14 Mar 2024 09:49:09 -0400 Subject: [PATCH 2/9] incorporated ramy's comments --- examples/annotation_import/pdf.ipynb | 2 +- examples/basics/data_rows.ipynb | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb index 502656bab..d3a52a9a8 100644 --- a/examples/annotation_import/pdf.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -1141,7 +1141,7 @@ "# client = client,\n", "# project_id = project.uid,\n", "# name=\"label_import_job\"+str(uuid.uuid4()),\n", - "# labels=labels)\n", + "# labels=labels) ## Remove unsupported relationships from the labels list\n", "\n", "# print(\"Errors:\", upload_job.errors)\n", "# print(\"Status of uploads: \", upload_job.statuses)" diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index b2bcd843b..45b3cf6d0 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -162,7 +162,6 @@ "\n", "if export_task.has_errors():\n", " export_task.get_stream(\n", - "\n", " converter=lb.JsonConverter(),\n", " stream_type=lb.StreamType.ERRORS\n", " ).start(stream_handler=lambda error: print(error))\n", From 9356abbab8ad04fe3a98ea36d49694046c0a3b48 Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Thu, 14 Mar 2024 10:04:44 -0400 Subject: [PATCH 3/9] no code updates to composite mask export --- examples/exports/composite_mask_export.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/exports/composite_mask_export.ipynb b/examples/exports/composite_mask_export.ipynb index 5a352b3e5..dc46810fd 100644 --- a/examples/exports/composite_mask_export.ipynb +++ b/examples/exports/composite_mask_export.ipynb @@ -103,7 +103,7 @@ "```json \n", " {\n", " \"composite_mask\": {\n", - " \"url\": \"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}\",\n", + " \"url\": \"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1\",\n", " \"color_rgb\": [\n", " 142,\n", " 220,\n", @@ -113,7 +113,7 @@ " }\n", "```\n", " - ***Video example*** :\n", - " The export will adhere to the following URL convention by default. However, the image's URL convention is also considered valid.\n", + " The export will adhere to the following URL convention by default.\n", "```json\n", " {\n", " \"composite_mask\": {\n", @@ -138,7 +138,7 @@ "task_id = \"\"\n", "composite_mask_id = \"\"\n", "\n", - "mask_url = f'https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}'\n", + "mask_url = f'https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1'\n", "req = urllib.request.Request(mask_url, headers=client.headers)\n", "image = Image.open(urllib.request.urlopen(req))\n", "w, h = image.size\n", @@ -150,15 +150,15 @@ "cell_type": "code", "outputs": [ { + "output_type": "execute_result", "data": { "image/png": "", "text/plain": [ "" ] }, - "execution_count": 15, "metadata": {}, - "output_type": "execute_result" + "execution_count": 15 } ], "execution_count": null @@ -177,7 +177,7 @@ " \"annotation_kind\": \"ImageSegmentationMask\",\n", " \"classifications\": [],\n", " \"composite_mask\": {\n", - " \"url\": \"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}\",\n", + " \"url\": \"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1\",\n", " \"color_rgb\": [\n", " 123,\n", " 103,\n", From 13a894eb7131890e95315154ae223b6da0db3c66 Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Thu, 14 Mar 2024 10:06:55 -0400 Subject: [PATCH 4/9] no code updates to composite mask export --- examples/exports/composite_mask_export.ipynb | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/examples/exports/composite_mask_export.ipynb b/examples/exports/composite_mask_export.ipynb index dc46810fd..89d359d39 100644 --- a/examples/exports/composite_mask_export.ipynb +++ b/examples/exports/composite_mask_export.ipynb @@ -148,19 +148,7 @@ "image.resize((new_w, new_h), Image.BICUBIC)" ], "cell_type": "code", - "outputs": [ - { - "output_type": "execute_result", - "data": { - "image/png": "", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 15 - } - ], + "outputs": [], "execution_count": null }, { From c51522d2a832bf1077147094421851c4dc1ccf02 Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Thu, 14 Mar 2024 10:09:31 -0400 Subject: [PATCH 5/9] Revert "no code updates to composite mask export" This reverts commit 13a894eb7131890e95315154ae223b6da0db3c66. --- examples/exports/composite_mask_export.ipynb | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/exports/composite_mask_export.ipynb b/examples/exports/composite_mask_export.ipynb index 89d359d39..dc46810fd 100644 --- a/examples/exports/composite_mask_export.ipynb +++ b/examples/exports/composite_mask_export.ipynb @@ -148,7 +148,19 @@ "image.resize((new_w, new_h), Image.BICUBIC)" ], "cell_type": "code", - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], "execution_count": null }, { From d480ec6c70d29336c5e3f2a20a65817a7eedc03f Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Thu, 14 Mar 2024 12:53:11 -0400 Subject: [PATCH 6/9] Update prediction pdf notebook --- examples/annotation_import/pdf.ipynb | 5 +- .../prediction_upload/pdf_predictions.ipynb | 179 +++++++++++------- 2 files changed, 110 insertions(+), 74 deletions(-) diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb index d3a52a9a8..349feabb8 100644 --- a/examples/annotation_import/pdf.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -80,6 +80,7 @@ "source": [ "import uuid\n", "import json\n", + "import requests\n", "import labelbox as lb\n", "import labelbox.types as lb_types\n" ], @@ -624,7 +625,7 @@ { "metadata": {}, "source": [ - "Passing a `text_layer_url` is not longer required. Labelbox automatically generates one using Google Document AI and its OCR engine to detect tokens. \n", + "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", "\n", "However, it's important to note that Google Document AI imposes specific restrictions on document size:\n", "- The document must have no more than 15 pages.\n", @@ -867,8 +868,6 @@ { "metadata": {}, "source": [ - "import requests\n", - "import json\n", "\n", "# Helper method\n", "def update_text_selections(annotation, group_id, list_tokens, page):\n", diff --git a/examples/prediction_upload/pdf_predictions.ipynb b/examples/prediction_upload/pdf_predictions.ipynb index 95aa0e39a..d2e9ea660 100644 --- a/examples/prediction_upload/pdf_predictions.ipynb +++ b/examples/prediction_upload/pdf_predictions.ipynb @@ -43,6 +43,7 @@ "- Free text classifications\n", "- Bounding box\n", "- Entities\n", + "- Relationships (only supported for MAL imports)\n", "\n", "\n", "*NDJson*\n", @@ -50,7 +51,8 @@ "- Radio classifications (including nested classifications)\n", "- Free text classifications\n", "- Bounding box \n", - "- Entities " + "- Entities \n", + "- Relationships (only supported for MAL imports)" ], "cell_type": "markdown" }, @@ -73,9 +75,11 @@ { "metadata": {}, "source": [ + "import uuid\n", + "import json\n", + "import requests\n", "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "import uuid" + "import labelbox.types as lb_types" ], "cell_type": "code", "outputs": [], @@ -127,7 +131,7 @@ ")\n", "\n", "# NDJSON\n", - "entities_prediction_ndjson = { \n", + "entities_prediction_ndjson = {\n", " \"name\": \"named_entity\",\n", " \"confidence\": 0.5,\n", " \"textSelections\": [\n", @@ -150,10 +154,10 @@ "source": [ "########### Radio Classification #########\n", "\n", - "# Annotation types \n", + "# Annotation types\n", "radio_prediction = lb_types.ClassificationAnnotation(\n", " name=\"radio_question\",\n", - " value=lb_types.Radio(answer = \n", + " value=lb_types.Radio(answer =\n", " lb_types.ClassificationAnswer(name = \"first_radio_answer\", confidence=0.5)\n", " )\n", ")\n", @@ -172,7 +176,7 @@ "source": [ "############ Checklist Classification ###########\n", "\n", - "# Annotation types \n", + "# Annotation types\n", "checklist_prediction = lb_types.ClassificationAnnotation(\n", " name=\"checklist_question\",\n", " value=lb_types.Checklist(answer = [\n", @@ -209,7 +213,7 @@ "bbox_prediction = lb_types.ObjectAnnotation(\n", " name=\"bounding_box\", # must match your ontology feature\"s name\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"], y=bbox_dim_1[\"top\"]), # x = left, y = top \n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"], y=bbox_dim_1[\"top\"]), # x = left, y = top\n", " end=lb_types.Point(x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"], y=bbox_dim_1[\"top\"]+ bbox_dim_1[\"height\"]), # x= left + width , y = top + height\n", " page=0,\n", " unit=lb_types.RectangleUnit.POINTS\n", @@ -237,14 +241,14 @@ " value=lb_types.Checklist(\n", " answer=[lb_types.ClassificationAnswer(\n", " name=\"first_checklist_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer \n", + " confidence=0.5, # Confidence scores should be added to the answer\n", " classifications=[\n", " lb_types.ClassificationAnnotation(\n", " name=\"sub_checklist_question\",\n", " value=lb_types.Checklist(\n", " answer=[lb_types.ClassificationAnswer(\n", " name=\"first_sub_checklist_answer\",\n", - " confidence=0.5 # Confidence scores should be added to the answer \n", + " confidence=0.5 # Confidence scores should be added to the answer\n", " )]\n", " ))\n", " ]\n", @@ -255,17 +259,17 @@ "nested_checklist_prediction_ndjson = {\n", " \"name\": \"nested_checklist_question\",\n", " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\", \n", - " \"confidence\": 0.5, # Confidence scores should be added to the answer \n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5, # Confidence scores should be added to the answer\n", " \"classifications\" : [\n", " {\n", - " \"name\": \"sub_checklist_question\", \n", + " \"name\": \"sub_checklist_question\",\n", " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\", \n", - " \"confidence\": 0.5, # Confidence scores should be added to the answer \n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5, # Confidence scores should be added to the answer\n", " }\n", - " } \n", - " ] \n", + " }\n", + " ]\n", " }]\n", "}\n", "\n", @@ -274,14 +278,14 @@ " value=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"first_radio_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer \n", + " confidence=0.5, # Confidence scores should be added to the answer\n", " classifications=[\n", " lb_types.ClassificationAnnotation(\n", " name=\"sub_radio_question\",\n", " value=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"first_sub_radio_answer\",\n", - " confidence=0.5 # Confidence scores should be added to the answer \n", + " confidence=0.5 # Confidence scores should be added to the answer\n", " )\n", " )\n", " )\n", @@ -298,7 +302,7 @@ " \"confidence\": 0.5,\n", " \"classifications\": [{\n", " \"name\":\"sub_radio_question\",\n", - " \"answer\": { \"name\" : \"first_sub_radio_answer\", \n", + " \"answer\": { \"name\" : \"first_sub_radio_answer\",\n", " \"confidence\": 0.5}\n", " }]\n", " }\n", @@ -312,7 +316,7 @@ { "metadata": {}, "source": [ - "############## Classification Free-form text ############## \n", + "############## Classification Free-form text ##############\n", "\n", "text_prediction = lb_types.ClassificationAnnotation(\n", " name=\"free_text\", # must match your ontology feature\"s name\n", @@ -344,9 +348,9 @@ "\n", "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", " name=\"bbox_with_radio_subclass\",\n", - " confidence=0.5, \n", + " confidence=0.5,\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"], y=bbox_dim[\"top\"]), # x = left, y = top \n", + " start=lb_types.Point(x=bbox_dim[\"left\"], y=bbox_dim[\"top\"]), # x = left, y = top\n", " end=lb_types.Point(x=bbox_dim[\"left\"] + bbox_dim[\"width\"], y=bbox_dim[\"top\"] + bbox_dim[\"height\"]), # x= left + width , y = top + height\n", " unit=lb_types.RectangleUnit.POINTS,\n", " page=1\n", @@ -357,7 +361,7 @@ " \t\tvalue=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"first_sub_radio_answer\",\n", - " confidence=0.5, \n", + " confidence=0.5,\n", " classifications=[\n", " lb_types.ClassificationAnnotation(\n", " name=\"second_sub_radio_question\",\n", @@ -379,13 +383,13 @@ " \"name\": \"bbox_with_radio_subclass\",\n", " \"classifications\": [\n", " {\n", - " \"name\": \"sub_radio_question\", \n", + " \"name\": \"sub_radio_question\",\n", " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\", \n", + " \"name\": \"first_sub_radio_answer\",\n", " \"confidence\": 0.5,\n", " \"classifications\": [\n", " {\n", - " \"name\": \"second_sub_radio_question\", \n", + " \"name\": \"second_sub_radio_question\",\n", " \"answer\": {\n", " \"name\": \"second_sub_radio_answer\", \"confidence\": 0.5}\n", " }\n", @@ -406,12 +410,12 @@ "metadata": {}, "source": [ "\n", - "############ NER with nested classifications ######## \n", + "############ NER with nested classifications ########\n", "\n", "\n", "ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n", " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5, \n", + " confidence=0.5,\n", " value=lb_types.DocumentEntity(\n", " name=\"ner_with_checklist_subclass\",\n", " text_selections=[\n", @@ -438,7 +442,7 @@ " \"classifications\":[\n", " {\n", " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\"name\": \"first_sub_checklist_answer\", \"confidence\":0.5 }] \n", + " \"answer\": [{\"name\": \"first_sub_checklist_answer\", \"confidence\":0.5 }]\n", " }\n", " ],\n", " \"textSelections\": [\n", @@ -449,9 +453,9 @@ " \"groupId\": \"\",\n", " \"page\": 1\n", " }\n", - " ] \n", + " ]\n", "}\n", - " \n" + "\n" ], "cell_type": "code", "outputs": [], @@ -467,12 +471,27 @@ { "metadata": {}, "source": [ - "## Text layer url is required for uploading entity annotations\n", + "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", + "\n", + "However, it's important to note that Google Document AI imposes specific restrictions on document size:\n", + "- The document must have no more than 15 pages.\n", + "- The file size should not exceed 20 MB.\n", + "\n", + "Furthermore, Google Document AI optimizes documents before OCR processing. This optimization might include rotating images or pages to ensure that text appears horizontally. Consequently, token coordinates are calculated based on the rotated/optimized images, resulting in potential discrepancies with the original PDF document.\n", + "\n", + "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", + "\n", + "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ "global_key = \"0801.3483.pdf\"\n", "img_url = {\n", " \"row_data\": {\n", - " \"pdf_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n", - " \"text_layer_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json\"\n", + " \"pdf_url\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", " },\n", " \"global_key\": global_key\n", "}\n", @@ -503,9 +522,9 @@ "\n", "ontology_builder = lb.OntologyBuilder(\n", " classifications=[ # List of Classification objects\n", - " lb.Classification( \n", + " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\", \n", + " name=\"radio_question\",\n", " scope = lb.Classification.Scope.GLOBAL,\n", " options=[\n", " lb.Option(value=\"first_radio_answer\"),\n", @@ -514,13 +533,13 @@ " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\", \n", + " name=\"checklist_question\",\n", " scope = lb.Classification.Scope.GLOBAL,\n", " options=[\n", " lb.Option(value=\"first_checklist_answer\"),\n", " lb.Option(value=\"second_checklist_answer\")\n", " ]\n", - " ), \n", + " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.TEXT,\n", " name=\"free_text\",\n", @@ -550,15 +569,15 @@ " options=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\", \n", + " name=\"sub_checklist_question\",\n", " options=[lb.Option(\"first_sub_checklist_answer\")]\n", " )\n", " ])\n", " ]\n", - " ), \n", + " ),\n", " ],\n", " tools=[ # List of Tool objects\n", - " lb.Tool( tool=lb.Tool.Type.BBOX,name=\"bounding_box\"), \n", + " lb.Tool( tool=lb.Tool.Type.BBOX,name=\"bounding_box\"),\n", " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", " lb.Tool(tool=lb.Tool.Type.NER,\n", " name=\"ner_with_checklist_subclass\",\n", @@ -610,7 +629,7 @@ "metadata": {}, "source": [ "# create Model\n", - "model = client.create_model(name=\"PDF_model_run_\"+ str(uuid.uuid4()), \n", + "model = client.create_model(name=\"PDF_model_run_\"+ str(uuid.uuid4()),\n", " ontology_id=ontology.uid)\n", "# create Model Run\n", "model_run = model.create_model_run(\"iteration 1\")" @@ -650,19 +669,38 @@ { "metadata": {}, "source": [ - "First, we need to populate the text selections for Entity annotations\n", - "To learn how to generate a text layer for your documents please refer to the following repositories/files: \n", - "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/gcloud/gcp-vision-to-lb-text-layer.py\n", - "https://github.com/Labelbox/PDF-OCR-Transform-CLI/blob/main/src/scripts/adobe/adobe-ocr-to-lb-text-layer.py" + "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "import requests\n", - "import json\n", + "To extract the generated text layer url we first need to export the data row" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "client.enable_experimental = True\n", + "task = lb.DataRow.export(client=client,global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_stream()\n", "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = json.loads(output.json_str)\n", + " text_layer = output_json['media_attributes']['text_layer_url']\n", + "print(text_layer)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ "# Helper method\n", "def update_text_selections(annotation, group_id, list_tokens, page):\n", " return annotation.update({\n", @@ -674,15 +712,14 @@ " }\n", " ]\n", " })\n", - " \n", "\n", - "text_layer = \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json\"\n", "\n", "# Fetch the content of the text layer\n", - "res = requests.get(text_layer) \n", + "res = requests.get(text_layer)\n", "\n", "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\"Metal-insulator (MI) transitions have been one of the\", \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\"]\n", + "content_phrases = [\"Metal-insulator (MI) transitions have been one of the\",\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\"]\n", "\n", "# Parse the text layer\n", "text_selections = []\n", @@ -698,7 +735,7 @@ " text_selections.append(document_text_selection)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=entities_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=list_tokens, # ids representing individual words from the group\n", " page=1)\n", " if group[\"content\"] == content_phrases[1]:\n", @@ -708,17 +745,17 @@ " text_selections_ner.append(ner_text_selection)\n", " # build text selection for the NDJson annotations\n", " update_text_selections(annotation=ner_with_checklist_subclass_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words \n", + " group_id=group[\"id\"], # id representing group of words\n", " list_tokens=list_tokens_2, # ids representing individual words from the group\n", " page=1)\n", "\n", "\n", - "#re-write the entity annotation with text selections \n", + "#re-write the entity annotation with text selections\n", "entities_prediction_document_entity = lb_types.DocumentEntity(name=\"named_entity\",confidence=0.5, textSelections = text_selections)\n", "entities_prediction = lb_types.ObjectAnnotation(name=\"named_entity\",value=entities_prediction_document_entity)\n", "\n", "\n", - "# re-write the entity annotation + subclassification with text selections \n", + "# re-write the entity annotation + subclassification with text selections\n", "classifications = [\n", " lb_types.ClassificationAnnotation(\n", " name=\"sub_checklist_question\",\n", @@ -728,12 +765,12 @@ " )\n", " ]\n", "ner_annotation_with_subclass = lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",confidence=0.5, textSelections= text_selections_ner)\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name=\"ner_with_checklist_subclass\", \n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(name=\"ner_with_checklist_subclass\",\n", " confidence=0.5,\n", - " value=ner_annotation_with_subclass, \n", + " value=ner_annotation_with_subclass,\n", " classifications=classifications)\n", "\n", - "# Final NDJSON and python annotations \n", + "# Final NDJSON and python annotations\n", "print(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\n", "print(f\"entities_annotation={entities_prediction}\")\n", "print(f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\")\n", @@ -762,7 +799,7 @@ " annotations = [\n", " entities_prediction,\n", " checklist_prediction,\n", - " nested_checklist_prediction, \n", + " nested_checklist_prediction,\n", " text_prediction,\n", " radio_prediction,\n", " nested_radio_prediction,\n", @@ -851,7 +888,7 @@ { "metadata": {}, "source": [ - "project = client.create_project(name=\"Document Prediction Import Demo\", \n", + "project = client.create_project(name=\"Document Prediction Import Demo\",\n", " media_type=lb.MediaType.Document)\n", "project.setup_editor(ontology)" ], @@ -899,7 +936,7 @@ "\n", "radio_annotation = lb_types.ClassificationAnnotation(\n", " name=\"radio_question\",\n", - " value=lb_types.Radio(answer = \n", + " value=lb_types.Radio(answer =\n", " lb_types.ClassificationAnswer(name = \"first_radio_answer\")\n", " )\n", ")\n", @@ -908,7 +945,7 @@ " name=\"checklist_question\",\n", " value=lb_types.Checklist(answer = [\n", " lb_types.ClassificationAnswer(name = \"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name = \"second_checklist_answer\"), \n", + " lb_types.ClassificationAnswer(name = \"second_checklist_answer\"),\n", " ])\n", " )\n", "\n", @@ -921,7 +958,7 @@ "bbox_annotation = lb_types.ObjectAnnotation(\n", " name=\"bounding_box\", # must match your ontology feature\"s name\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"], y=bbox_dim_1[\"top\"]), # x = left, y = top \n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"], y=bbox_dim_1[\"top\"]), # x = left, y = top\n", " end=lb_types.Point(x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"], y=bbox_dim_1[\"top\"]+ bbox_dim_1[\"height\"]), # x= left + width , y = top + height\n", " page=0,\n", " unit=lb_types.RectangleUnit.POINTS\n", @@ -966,7 +1003,7 @@ ")\n", "\n", "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", \n", + " name=\"free_text\",\n", " value=lb_types.Text(answer=\"sample text\")\n", ")\n", "\n", @@ -979,9 +1016,9 @@ "\n", "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", " name=\"bbox_with_radio_subclass\",\n", - " \n", + "\n", " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"], y=bbox_dim[\"top\"]), # x = left, y = top \n", + " start=lb_types.Point(x=bbox_dim[\"left\"], y=bbox_dim[\"top\"]), # x = left, y = top\n", " end=lb_types.Point(x=bbox_dim[\"left\"] + bbox_dim[\"width\"], y=bbox_dim[\"top\"] + bbox_dim[\"height\"]), # x= left + width , y = top + height\n", " unit=lb_types.RectangleUnit.POINTS,\n", " page=1\n", @@ -992,11 +1029,11 @@ " \t\tvalue=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"first_sub_radio_answer\",\n", - " \n", + "\n", " classifications=[\n", " lb_types.ClassificationAnnotation(\n", " name=\"second_sub_radio_question\",\n", - " \n", + "\n", " value=lb_types.Radio(\n", " answer=lb_types.ClassificationAnswer(\n", " name=\"second_sub_radio_answer\"\n", @@ -1049,7 +1086,7 @@ " annotations = [\n", " entities_annotation,\n", " checklist_annotation,\n", - " nested_checklist_annotation, \n", + " nested_checklist_annotation,\n", " text_annotation,\n", " radio_annotation,\n", " nested_radio_annotation,\n", From 127531cd4d118afba6aea27f93194843bd8ed48f Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Thu, 14 Mar 2024 13:10:08 -0400 Subject: [PATCH 7/9] Update prediction pdf notebook --- examples/prediction_upload/pdf_predictions.ipynb | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/prediction_upload/pdf_predictions.ipynb b/examples/prediction_upload/pdf_predictions.ipynb index d2e9ea660..744bb10fa 100644 --- a/examples/prediction_upload/pdf_predictions.ipynb +++ b/examples/prediction_upload/pdf_predictions.ipynb @@ -500,8 +500,15 @@ "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", "task = dataset.create_data_rows([img_url])\n", "task.wait_till_done()\n", - "print(\"Errors:\",task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if 'Duplicate global key' in error['message'] and dataset.row_count == 0:\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" ], "cell_type": "code", "outputs": [], From 51bc2fe927acb4cb05628bb10d697ffe30251c45 Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Mon, 18 Mar 2024 09:35:48 -0400 Subject: [PATCH 8/9] latest_updates --- examples/basics/data_rows.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index 45b3cf6d0..abe02fd86 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -47,7 +47,6 @@ " * HTML \n", " * DICOM \n", " * Conversational\n", - "* A data row is a member of a dataset \n", "* A data row cannot exist without belonging to a dataset.\n", "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects" ], From 9e09c38067418b32d84343c12b7025b495556309 Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Mon, 18 Mar 2024 15:49:25 -0400 Subject: [PATCH 9/9] updated readme --- examples/README.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/examples/README.md b/examples/README.md index eda753467..b10d7dde5 100644 --- a/examples/README.md +++ b/examples/README.md @@ -18,20 +18,19 @@ | Ontologies | [Github](basics/ontologies.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/ontologies.ipynb) | | Projects | [Github](basics/projects.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/projects.ipynb) | | User Management | [Github](basics/user_management.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/user_management.ipynb) | +| Export V1 to V2 migration | [Github](basics/Export_V1_to_V2_migration_support.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/Export_V1_to_V2_migration_support.ipynb) | --- +## [Exports](exports) -## [Model Training](https://docs.labelbox.com/docs/integration-with-model-training-service) - -Train a model using data annotated on Labelbox - -| Notebook | Github | Google Colab | -| ------------------------------- | ----------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Object Detection (Detectron2) | [Github](integrations/detectron2/coco_object.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/integrations/detectron2/coco_object.ipynb) | -| Panoptic Detection (Detectron2) | [Github](integrations/detectron2/coco_panoptic.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/integrations/detectron2/coco_panoptic.ipynb) | +| Notebook | Github | Google Colab | Learn more | +| ------------------------------------- | ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- | +| Export data | [Github](exports/export_data.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/exports/export_data.ipynb) | +| Composite mask export | [Github](exports/composite_mask_export.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/exports/composite_mask_export.ipynb) | --- + ## [Annotation Import (Ground Truth & MAL)](annotation_import) | Notebook | Github | Google Colab | Learn more |