diff --git a/examples/annotation_import/conversational.ipynb b/examples/annotation_import/conversational.ipynb index 1b9cb9c77..4520fca4a 100644 --- a/examples/annotation_import/conversational.ipynb +++ b/examples/annotation_import/conversational.ipynb @@ -295,7 +295,8 @@ "cell_type": "markdown", "source": [ "## Step 2: Create/select an ontology\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool and classification names should match the `name` field in your annotations to ensure the correct feature schemas are matched.\n", + "\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the bounding box annotation [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1), we provided the `name` as `text_convo`. Now, when we setup our ontology, we must ensure that the name of my bounding box tool is also `checklist_convo`. The same alignment must hold true for the other tools and classifications we create in our ontology." ], @@ -322,11 +323,11 @@ " Classification( # Text classification given the name \"text\"\n", " class_type=Classification.Type.TEXT,\n", " scope=Classification.Scope.INDEX, \n", - " name=\"text_convo\"), \n", + " instructions=\"text_convo\"), \n", " Classification( # Checklist classification given the name \"text\" with two options: \"first_checklist_answer\" and \"second_checklist_answer\"\n", " class_type=Classification.Type.CHECKLIST, \n", " scope=Classification.Scope.INDEX, \n", - " name=\"checklist_convo\", \n", + " instructions=\"checklist_convo\", \n", " options=[\n", " Option(value=\"first_checklist_answer\"),\n", " Option(value=\"second_checklist_answer\") \n", @@ -334,7 +335,7 @@ " ), \n", " Classification( # Radio classification given the name \"text\" with two options: \"first_radio_answer\" and \"second_radio_answer\"\n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_convo\", \n", + " instructions=\"radio_convo\", \n", " scope=Classification.Scope.INDEX, \n", " options=[\n", " Option(value=\"first_radio_answer\"),\n", diff --git a/examples/annotation_import/image.ipynb b/examples/annotation_import/image.ipynb index b43098879..7e3a7d179 100644 --- a/examples/annotation_import/image.ipynb +++ b/examples/annotation_import/image.ipynb @@ -587,7 +587,7 @@ "cell_type": "markdown", "source": [ "## Step 2: Create/select an Ontology\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool and classification names should match the `name` field in your annotations to ensure the correct feature schemas are matched.\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the bounding box annotation above, we provided the `name` as `polyline`. Now, when we setup our ontology, we must ensure that the name of my bounding box tool is also `polyline`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", "\n", @@ -606,7 +606,7 @@ " classifications=[ # List of Classification objects\n", " Classification( # Radio classification given the name \"text\" with two options: \"first_radio_answer\" and \"second_radio_answer\"\n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question\", \n", + " instructions=\"radio_question\", \n", " options=[\n", " Option(value=\"first_radio_answer\"),\n", " Option(value=\"second_radio_answer\")\n", @@ -614,7 +614,7 @@ " ),\n", " Classification( # Checklist classification given the name \"text\" with two options: \"first_checklist_answer\" and \"second_checklist_answer\"\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"checklist_question\", \n", + " instructions=\"checklist_question\", \n", " options=[\n", " Option(value=\"first_checklist_answer\"),\n", " Option(value=\"second_checklist_answer\") \n", @@ -622,17 +622,17 @@ " ), \n", " Classification( # Text classification given the name \"text\"\n", " class_type=Classification.Type.TEXT,\n", - " name=\"free_text\"\n", + " instructions=\"free_text\"\n", " ),\n", " Classification(\n", " class_type=Classification.Type.RADIO, \n", - " name=\"nested_radio_question\",\n", + " instructions=\"nested_radio_question\",\n", " options=[\n", " Option(\"first_radio_answer\",\n", " options=[\n", " Classification(\n", " class_type=Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " instructions=\"sub_radio_question\",\n", " options=[Option(\"first_sub_radio_answer\")]\n", " )\n", " ]\n", @@ -651,7 +651,7 @@ " classifications=[\n", " Classification(\n", " class_type=Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " instructions=\"sub_radio_question\",\n", " options=[\n", " Option(value=\"first_sub_radio_answer\")\n", " ]\n", diff --git a/examples/annotation_import/text.ipynb b/examples/annotation_import/text.ipynb index 407f8dc4b..1ffa020b5 100644 --- a/examples/annotation_import/text.ipynb +++ b/examples/annotation_import/text.ipynb @@ -366,7 +366,7 @@ "cell_type": "markdown", "source": [ "### Step 2: Create/select an ontology\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool and classification names should match the `name` field in your annotations to ensure the correct feature schemas are matched.\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the checklist annotation above, we provided the `name` as `checklist_question`. Now, when we setup our ontology, we must ensure that the name of my classification tool is also `checklist_question`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", "\n", @@ -386,18 +386,18 @@ " classifications=[ # List of Classification objects\n", " Classification( \n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question\", \n", + " instructions=\"radio_question\", \n", " options=[Option(value=\"first_radio_answer\")]\n", " ),\n", " Classification( \n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question_sub\", \n", + " instructions=\"radio_question_sub\", \n", " options=[\n", " Option(value=\"first_radio_answer\",\n", " options=[\n", " Classification(\n", " class_type=Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " instructions=\"sub_radio_question\",\n", " options=[\n", " Option(value=\"first_sub_radio_answer\")\n", " ]\n", @@ -408,7 +408,7 @@ " ),\n", " Classification( \n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"checklist_question\", \n", + " instructions=\"checklist_question\", \n", " options=[\n", " Option(value=\"first_checklist_answer\"),\n", " Option(value=\"second_checklist_answer\"), \n", @@ -417,7 +417,7 @@ " ), \n", " Classification( # Text classification given the name \"text\"\n", " class_type=Classification.Type.TEXT,\n", - " name=\"free_text\"\n", + " instructions=\"free_text\"\n", " )\n", " ],\n", " tools=[ # List of Tool objects\n", diff --git a/examples/annotation_import/tiled.ipynb b/examples/annotation_import/tiled.ipynb index b2853ff82..dcc2e6a4f 100644 --- a/examples/annotation_import/tiled.ipynb +++ b/examples/annotation_import/tiled.ipynb @@ -616,13 +616,14 @@ }, { "cell_type": "markdown", + "source": [ + "### Step 2: Create/select an ontology\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched." + ], "metadata": { "id": "y_tWMvxilTq9" }, - "source": [ - "### Step 2: Create/select an ontology\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool and classification names should match the `name` field in your annotations to ensure the correct feature schemas are matched." - ] + "id": "y_tWMvxilTq9" }, { "cell_type": "code", @@ -646,7 +647,7 @@ " classifications=[\n", " Classification(\n", " class_type=Classification.Type.CHECKLIST,\n", - " name=\"checklist_class_name\",\n", + " instructions=\"checklist_class_name\",\n", " options=[\n", " Option(value=\"first_checklist_answer\")\n", " ]\n", @@ -659,7 +660,7 @@ " classifications=[\n", " Classification(\n", " class_type=Classification.Type.TEXT,\n", - " name=\"free_text_geo\"\n", + " instructions=\"free_text_geo\"\n", " ),\n", " ]\n", " ) \n", @@ -667,7 +668,7 @@ " classifications = [\n", " Classification(\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"checklist_question_geo\",\n", + " instructions=\"checklist_question_geo\",\n", " options=[\n", " Option(value=\"first_checklist_answer\"),\n", " Option(value=\"second_checklist_answer\"), \n", @@ -676,7 +677,7 @@ " ), \n", " Classification(\n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question_geo\",\n", + " instructions=\"radio_question_geo\",\n", " options=[\n", " Option(value=\"first_radio_answer\")\n", " ]\n", diff --git a/examples/annotation_import/video.ipynb b/examples/annotation_import/video.ipynb index 8afc3624f..7a76fc678 100644 --- a/examples/annotation_import/video.ipynb +++ b/examples/annotation_import/video.ipynb @@ -504,7 +504,7 @@ }, "source": [ "### Step 2: Create/select an ontology\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool and classification names should match the `name` field in your annotations to ensure the correct feature schemas are matched.\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the bounding box annotation above, we provided the `name` as `bbox_video`. Now, when we setup our ontology, we must ensure that the name of my bounding box tool is also `bbox_video`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", "\n", @@ -531,7 +531,7 @@ " classifications=[\n", " Classification(\n", " class_type=Classification.Type.RADIO, \n", - " name=\"bbox_radio\", \n", + " instructions=\"bbox_radio\", \n", " scope = Classification.Scope.INDEX,\n", " options=[\n", " Option(value=\"bbox_radio_answer_1\"),\n", @@ -545,7 +545,7 @@ " classifications=[ \n", " Classification(\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"checklist_class\",\n", + " instructions=\"checklist_class\",\n", " scope = Classification.Scope.INDEX, ## Need to defined scope for frame classifications \n", " options=[ \n", " Option(value=\"first_checklist_answer\"),\n", @@ -554,7 +554,7 @@ " ),\n", " Classification(\n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_class_global\",\n", + " instructions=\"radio_class_global\",\n", " options=[ \n", " Option(value=\"first_radio_answer\"),\n", " Option(value=\"second_radio_answer\")\n", @@ -562,13 +562,13 @@ " ),\n", " Classification(\n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question_nested\",\n", + " instructions=\"radio_question_nested\",\n", " options=[\n", " Option(\"first_radio_question\",\n", " options=[\n", " Classification(\n", " class_type=Classification.Type.RADIO,\n", - " name=\"sub_question_radio\",\n", + " instructions=\"sub_question_radio\",\n", " options=[Option(\"sub_answer\")]\n", " )\n", " ]\n", diff --git a/examples/annotation_types/basics.ipynb b/examples/annotation_types/basics.ipynb index 80d8a0231..fcc0c28cc 100644 --- a/examples/annotation_types/basics.ipynb +++ b/examples/annotation_types/basics.ipynb @@ -1168,7 +1168,7 @@ " name=\"deer_nose\",\n", " classifications=[\n", " Classification(class_type=Classification.Type.RADIO,\n", - " name=\"description\",\n", + " instructions=\"description\",\n", " options=[Option(value=\"wet\")])\n", " ]),\n", " Tool(tool=Tool.Type.SEGMENTATION, name=\"deer_eyes\")\n", @@ -1176,7 +1176,7 @@ " classifications=[\n", " Classification(\n", " Classification.Type.CHECKLIST,\n", - " name=\"image_description\",\n", + " instructions=\"image_description\",\n", " options=[\n", " Option(value=\"bright\"),\n", " Option(value=\"not_blurry\"),\n", diff --git a/examples/annotation_types/label_containers.ipynb b/examples/annotation_types/label_containers.ipynb index cef1f71ab..69c48aaa7 100644 --- a/examples/annotation_types/label_containers.ipynb +++ b/examples/annotation_types/label_containers.ipynb @@ -207,7 +207,7 @@ " name=\"deer_nose\",\n", " classifications=[\n", " Classification(class_type=Classification.Type.RADIO,\n", - " name=\"nose_description\",\n", + " instructions=\"nose_description\",\n", " options=[Option(value=\"wet\")])\n", " ]),\n", " Tool(tool=Tool.Type.SEGMENTATION, name=\"deer_eyes\")\n", @@ -215,7 +215,7 @@ " classifications=[\n", " Classification(\n", " Classification.Type.CHECKLIST,\n", - " name=\"image_description\",\n", + " instructions=\"image_description\",\n", " options=[\n", " Option(value=\"bright\"),\n", " Option(value=\"not_blurry\"),\n", diff --git a/examples/basics/ontologies.ipynb b/examples/basics/ontologies.ipynb index 2602e8267..eba7459bb 100644 --- a/examples/basics/ontologies.ipynb +++ b/examples/basics/ontologies.ipynb @@ -355,13 +355,13 @@ "execution_count": 13, "source": [ "text_classification = Classification(class_type=Classification.Type.TEXT,\n", - " name=\"dog_name\")\n", + " instructions=\"dog_name\")\n", "radio_classification = Classification(class_type=Classification.Type.RADIO,\n", - " name=\"dog_breed\",\n", + " instructions=\"dog_breed\",\n", " options=[Option(\"poodle\")])\n", "checklist_classification = Classification(\n", " class_type=Classification.Type.CHECKLIST,\n", - " name=\"background\",\n", + " instructions=\"background\",\n", " options=[Option(\"at_park\"), Option(\"has_leash\")])" ], "outputs": [], diff --git a/examples/integrations/databricks/labelbox_databricks_example.ipynb b/examples/integrations/databricks/labelbox_databricks_example.ipynb index 07af6130b..7327f339b 100644 --- a/examples/integrations/databricks/labelbox_databricks_example.ipynb +++ b/examples/integrations/databricks/labelbox_databricks_example.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bd1df061-02be-4232-8016-4aeb27fd7691","showTitle":false,"title":""}},"source":["# Labelbox Connector for Databricks Tutorial Notebook"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"b3da60c1-5b0d-45f4-a56b-b001dd1b0ed6","showTitle":false,"title":""}},"source":["#### Pre-requisites\n","1. This tutorial notebook requires a Lablbox API Key. Please login to your [Labelbox Account](app.labelbox.com) and generate an [API Key](https://app.labelbox.com/account/api-keys)\n","2. A few cells below will install the Labelbox SDK and Connector Library. This install is notebook-scoped and will not affect the rest of your cluster. \n","3. Please make sure you are running at least the latest LTS version of Databricks. \n","\n","#### Notebook Preview\n","This notebook will guide you through these steps: \n","1. Connect to Labelbox via the SDK \n","2. Create a labeling dataset from a table of unstructured data in Databricks\n","3. Programmatically set up an ontology and labeling project in Labelbox\n","4. Load Bronze and Silver annotation tables from an example labeled project \n","5. Additional cells describe how to handle video annotations and use Labelbox Diagnostics and Catalog \n","\n","Additional documentation links are provided at the end of the notebook."]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"889e8c34-5b3d-4063-bbc9-0099251817ca","showTitle":false,"title":""}},"source":["Thanks for trying out the Databricks and Labelbox Connector! You or someone from your organization signed up for a Labelbox trial through Databricks Partner Connect. This notebook was loaded into your Shared directory to help illustrate how Labelbox and Databricks can be used together to power unstructured data workflows. \n","\n","Labelbox can be used to rapidly annotate a variety of unstructured data from your Data Lake ([images](https://labelbox.com/product/image), [video](https://labelbox.com/product/video), [text](https://labelbox.com/product/text), and [geospatial tiled imagery](https://docs.labelbox.com/docs/tiled-imagery-editor)) and the Labelbox Connector for Databricks makes it easy to bring the annotations back into your Lakehouse environment for AI/ML and analytical workflows. \n","\n","If you would like to watch a video of the workflow, check out our [Data & AI Summit Demo](https://databricks.com/session_na21/productionizing-unstructured-data-for-ai-and-analytics). \n","\n","\n","\"example-workflow\"\n","\n","
Questions or comments? Reach out to us at [ecosystem+databricks@labelbox.com](mailto:ecosystem+databricks@labelbox.com)"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"20ce88db-4682-4881-a5b6-2260a00203f3","showTitle":true,"title":"Install Labelbox Library & Labelbox Connector for Databricks"}},"outputs":[],"source":["%pip install labelbox labelspark"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bffc0447-1897-4dcb-869b-49ffa996e673","showTitle":false,"title":""}},"outputs":[],"source":["#This will import Koalas or Pandas-on-Spark based on your DBR version. \n","from pyspark import SparkContext\n","from packaging import version\n","sc = SparkContext.getOrCreate()\n","if version.parse(sc.version) < version.parse(\"3.2.0\"):\n"," import databricks.koalas as pd \n"," needs_koalas = True \n","else:\n"," import pyspark.pandas as pd\n"," needs_koalas = False"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"06ff0dac-d4ab-4182-bd6a-735781250779","showTitle":false,"title":""}},"source":["## Configure the SDK\n","\n","Now that Labelbox and the Databricks libraries have been installed, you will need to configure the SDK. You will need an API key that you can create through the app [here](https://app.labelbox.com/account/api-keys). You can also store the key using Databricks Secrets API. The SDK will attempt to use the env var `LABELBOX_API_KEY`"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"acdfe8c6-1d76-46af-a945-162f5c8a1e26","showTitle":false,"title":""}},"outputs":[],"source":["from labelbox import Client, Dataset\n","from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n","from labelbox.schema.media_type import MediaType\n","import labelspark\n","\n","API_KEY = \"\" \n","\n","if not(API_KEY):\n"," raise ValueError(\"Go to Labelbox to get an API key\")\n"," \n","client = Client(API_KEY)"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"a9d30902-fce5-494a-99d2-8b6c4780a8c9","showTitle":false,"title":""}},"source":["## Create seed data\n","\n","Next we'll load a demo dataset into a Spark table so you can see how to easily load assets into Labelbox via URLs with the Labelbox Connector for Databricks. \n","\n","Also, Labelbox has native support for AWS, Azure, and GCP cloud storage. You can connect Labelbox to your storage via [Delegated Access](https://docs.labelbox.com/docs/iam-delegated-access) and easily load those assets for annotation. For more information, you can watch this [video](https://youtu.be/wlWo6EmPDV4).\n","\n","You can also add data to Labelbox [using the Labelbox SDK directly](https://docs.labelbox.com/docs/datasets-datarows). We recommend using the SDK if you have complicated dataset creation requirements (e.g. including metadata with your dataset) which aren't handled by the Labelbox Connector for Databricks."]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"4ffa24ed-d12c-46ac-92be-159bf5e09762","showTitle":false,"title":""}},"outputs":[],"source":["sample_dataset_dict = {\"external_id\":[\"sample1.jpg\",\n"," \"sample2.jpg\",\n"," \"sample3.jpg\",\n"," \"sample4.jpg\",\n"," \"sample5.jpg\",\n"," \"sample6.jpg\",\n"," \"sample7.jpg\",\n"," \"sample8.jpg\",\n"," \"sample9.jpg\",\n"," \"sample10.jpg\"],\n"," \"row_data\":[\"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000247422.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000484849.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000215782.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_val2014_000000312024.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000486139.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000302713.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000523272.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000094514.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_val2014_000000050578.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000073727.jpg\"]}\n","\n","df = pd.DataFrame.from_dict(sample_dataset_dict).to_spark() #produces our demo Spark table of datarows for Labelbox"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bfaaf86c-3497-4096-a04e-95fa1bb3a576","showTitle":false,"title":""}},"outputs":[],"source":["# can parse the directory and make a Spark table of image URLs\n","SAMPLE_TABLE = \"sample_unstructured_data\"\n","\n","tblList = spark.catalog.listTables()\n","\n","if not any([table.name == SAMPLE_TABLE for table in tblList]):\n"," df.createOrReplaceTempView(SAMPLE_TABLE)\n"," print(f\"Registered table: {SAMPLE_TABLE}\")"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"4709566b-9d52-4e47-beaa-449071928b24","showTitle":false,"title":""}},"source":["You should now have a temporary table \"sample_unstructured_data\" which includes the file names and URLs for some demo images. We're going to use this table with Labelbox using the Labelbox Connector for Databricks!"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"9b2f6cc7-b8c8-4a42-a634-78d1ab9c7984","showTitle":false,"title":""}},"outputs":[],"source":["display(sqlContext.sql(f\"select * from {SAMPLE_TABLE} LIMIT 5\"))"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"b186a2ad-0640-435e-88e8-a94b7439b3c3","showTitle":false,"title":""}},"source":["## Create a Labeling Project\n","\n","Projects are where teams create labels. A project is requires a dataset of assets to be labeled and an ontology to configure the labeling interface.\n","\n","### Step 1: Create a dataaset\n","\n","The [Labelbox Connector for Databricks](https://pypi.org/project/labelspark/) expects a spark table with two columns; the first column \"external_id\" and second column \"row_data\"\n","\n","external_id is a filename, like \"birds.jpg\" or \"my_video.mp4\"\n","\n","row_data is the URL path to the file. Labelbox renders assets locally on your users' machines when they label, so your labeler will need permission to access that asset. \n","\n","Example: \n","\n","| external_id | row_data |\n","|-------------|--------------------------------------|\n","| image1.jpg | https://url_to_your_asset/image1.jpg |\n","| image2.jpg | https://url_to_your_asset/image2.jpg |\n","| image3.jpg | https://url_to_your_asset/image3.jpg |"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"e711f5c6-c99f-449d-83ef-4de53b5c11d4","showTitle":false,"title":""}},"outputs":[],"source":["unstructured_data = spark.table(SAMPLE_TABLE)\n","\n","demo_dataset = labelspark.create_dataset(client, unstructured_data, name = \"Databricks Demo Dataset\")"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cfea193f-fe59-4413-9bba-4bd37272bc4d","showTitle":false,"title":""}},"outputs":[],"source":["print(\"Open the dataset in the App\")\n","print(f\"https://app.labelbox.com/data/{demo_dataset.uid}\")"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"d15d1331-c969-4b8f-bc8c-04c0756fb2d2","showTitle":false,"title":""}},"source":["### Step 2: Create a project\n","\n","You can use the labelbox SDK to build your ontology (we'll do that next) You can also set your project up entirely through our website at app.labelbox.com.\n","\n","Check out our [ontology creation documentation.](https://docs.labelbox.com/docs/configure-ontology)"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"a0eca58e-f98e-429e-95d9-74f67cf1464d","showTitle":false,"title":""}},"outputs":[],"source":["# Create a new project\n","project_demo = client.create_project(name=\"Labelbox and Databricks Example\", media_type=MediaType.Image)\n","project_demo.datasets.connect(demo_dataset) # add the dataset to the queue\n","\n","ontology = OntologyBuilder()\n","\n","tools = [\n"," Tool(tool=Tool.Type.BBOX, name=\"Car\"),\n"," Tool(tool=Tool.Type.BBOX, name=\"Flower\"),\n"," Tool(tool=Tool.Type.BBOX, name=\"Fruit\"),\n"," Tool(tool=Tool.Type.BBOX, name=\"Plant\"),\n"," Tool(tool=Tool.Type.SEGMENTATION, name=\"Bird\"),\n"," Tool(tool=Tool.Type.SEGMENTATION, name=\"Person\"),\n"," Tool(tool=Tool.Type.SEGMENTATION, name=\"Dog\"),\n"," Tool(tool=Tool.Type.SEGMENTATION, name=\"Gemstone\"),\n","]\n","for tool in tools: \n"," ontology.add_tool(tool)\n","\n","conditions = [\"clear\", \"overcast\", \"rain\", \"other\"]\n","\n","weather_classification = Classification(\n"," class_type=Classification.Type.RADIO,\n"," name=\"what is the weather?\", \n"," options=[Option(value=c) for c in conditions]\n",") \n","ontology.add_classification(weather_classification)\n","\n","\n","# Setup editor\n","for editor in client.get_labeling_frontends():\n"," if editor.name == 'Editor':\n"," project_demo.setup(editor, ontology.asdict()) \n","\n","print(\"Project Setup is complete.\")"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"0f22df34-988f-4048-9fa7-d7a0ebc6173c","showTitle":false,"title":""}},"source":["### Step 3: Go label data"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"9fcc8ca1-af39-4bb9-9e1e-fd5202a5e46c","showTitle":false,"title":""}},"outputs":[],"source":["print(\"Open the project to start labeling\")\n","print(f\"https://app.labelbox.com/projects/{project_demo.uid}/overview\")"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"d01c2981-ab23-44df-a70c-9dcc617d5b6b","showTitle":false,"title":""},"collapsed":false,"pycharm":{"name":"#%%\n"}},"outputs":[],"source":["raise ValueError(\"Go label some data before continuing\")"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"19634126-4ccd-4372-a459-9db511785a22","showTitle":false,"title":""}},"source":["## Exporting labels/annotations\n","\n","After creating labels in Labelbox you can export them to use in Databricks for model training and analysis."]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"96031aa4-b947-4c9b-ad5c-9a6422fa3ec0","showTitle":false,"title":""}},"outputs":[],"source":["LABEL_TABLE = \"exported_labels\""]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"09bab9cb-9271-4029-af20-df9bab287c72","showTitle":false,"title":""}},"outputs":[],"source":["labels_table = labelspark.get_annotations(client, project_demo.uid, spark, sc)\n","labels_table.createOrReplaceTempView(LABEL_TABLE)\n","display(labels_table)"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"7f88d026-d4f3-4efc-929a-39f2d5393448","showTitle":false,"title":""}},"source":["## Other features of Labelbox\n","\n","[Model Assisted Labeling](https://docs.labelbox.com/docs/model-assisted-labeling)\n","
Once you train a model on your initial set of unstructured data, you can plug that model into Labelbox to support a Model Assisted Labeling workflow. Review the outputs of your model, make corrections, and retrain with ease! You can reduce future labeling costs by >50% by leveraging model assisted labeling.\n","\n","\"MAL\"\n","\n","[Catalog](https://docs.labelbox.com/docs/catalog)\n","
Once you've created datasets and annotations in Labelbox, you can easily browse your datasets and curate new ones in Catalog. Use your model embeddings to find images by similarity search. \n","\n","\"Catalog\"\n","\n","[Model Diagnostics](https://labelbox.com/product/model-diagnostics)\n","
Labelbox complements your MLFlow experiment tracking with the ability to easily visualize experiment predictions at scale. Model Diagnostics helps you quickly identify areas where your model is weak so you can collect the right data and refine the next model iteration. \n","\n","\"Diagnostics\""]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"8eaf3897-b0a8-44e9-aea4-fe58d5ee246f","showTitle":true,"title":"More Info"}},"source":["While using the Labelbox Connector for Databricks, you will likely use the Labelbox SDK (e.g. for programmatic ontology creation). These resources will help familiarize you with the Labelbox Python SDK: \n","* [Visit our docs](https://labelbox.com/docs/python-api) to learn how the SDK works\n","* Checkout our [notebook examples](https://github.com/Labelbox/labelspark/tree/master/notebooks) to follow along with interactive tutorials\n","* view our [API reference](https://labelbox.com/docs/python-api/api-reference).\n","\n","Questions or comments? Reach out to us at [ecosystem+databricks@labelbox.com](mailto:ecosystem+databricks@labelbox.com)"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"1e446cb2-0338-4e5a-9d03-fc24901b3109","showTitle":false,"title":""}},"source":["Copyright Labelbox, Inc. 2022. The source in this notebook is provided subject to the [Labelbox Terms of Service](https://docs.labelbox.com/page/terms-of-service). All included or referenced third party libraries are subject to the licenses set forth below.\n","\n","|Library Name|Library license | Library License URL | Library Source URL |\n","|---|---|---|---|\n","|Labelbox Python SDK|Apache-2.0 License |https://github.com/Labelbox/labelbox-python/blob/develop/LICENSE|https://github.com/Labelbox/labelbox-python\n","|Labelbox Connector for Databricks|Apache-2.0 License |https://github.com/Labelbox/labelspark/blob/master/LICENSE|https://github.com/Labelbox/labelspark\n","|Python|Python Software Foundation (PSF) |https://github.com/python/cpython/blob/master/LICENSE|https://github.com/python/cpython|\n","|Apache Spark|Apache-2.0 License |https://github.com/apache/spark/blob/master/LICENSE|https://github.com/apache/spark|"]}],"metadata":{"application/vnd.databricks.v1+notebook":{"dashboards":[],"language":"python","notebookMetadata":{"pythonIndentUnit":2},"notebookName":"labelbox_databricks_example","notebookOrigID":2233145370303228,"widgets":{}},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} +{"cells":[{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bd1df061-02be-4232-8016-4aeb27fd7691","showTitle":false,"title":""}},"source":["# Labelbox Connector for Databricks Tutorial Notebook"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"b3da60c1-5b0d-45f4-a56b-b001dd1b0ed6","showTitle":false,"title":""}},"source":["#### Pre-requisites\n","1. This tutorial notebook requires a Lablbox API Key. Please login to your [Labelbox Account](app.labelbox.com) and generate an [API Key](https://app.labelbox.com/account/api-keys)\n","2. A few cells below will install the Labelbox SDK and Connector Library. This install is notebook-scoped and will not affect the rest of your cluster. \n","3. Please make sure you are running at least the latest LTS version of Databricks. \n","\n","#### Notebook Preview\n","This notebook will guide you through these steps: \n","1. Connect to Labelbox via the SDK \n","2. Create a labeling dataset from a table of unstructured data in Databricks\n","3. Programmatically set up an ontology and labeling project in Labelbox\n","4. Load Bronze and Silver annotation tables from an example labeled project \n","5. Additional cells describe how to handle video annotations and use Labelbox Diagnostics and Catalog \n","\n","Additional documentation links are provided at the end of the notebook."]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"889e8c34-5b3d-4063-bbc9-0099251817ca","showTitle":false,"title":""}},"source":["Thanks for trying out the Databricks and Labelbox Connector! You or someone from your organization signed up for a Labelbox trial through Databricks Partner Connect. This notebook was loaded into your Shared directory to help illustrate how Labelbox and Databricks can be used together to power unstructured data workflows. \n","\n","Labelbox can be used to rapidly annotate a variety of unstructured data from your Data Lake ([images](https://labelbox.com/product/image), [video](https://labelbox.com/product/video), [text](https://labelbox.com/product/text), and [geospatial tiled imagery](https://docs.labelbox.com/docs/tiled-imagery-editor)) and the Labelbox Connector for Databricks makes it easy to bring the annotations back into your Lakehouse environment for AI/ML and analytical workflows. \n","\n","If you would like to watch a video of the workflow, check out our [Data & AI Summit Demo](https://databricks.com/session_na21/productionizing-unstructured-data-for-ai-and-analytics). \n","\n","\n","\"example-workflow\"\n","\n","
Questions or comments? Reach out to us at [ecosystem+databricks@labelbox.com](mailto:ecosystem+databricks@labelbox.com)"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"20ce88db-4682-4881-a5b6-2260a00203f3","showTitle":true,"title":"Install Labelbox Library & Labelbox Connector for Databricks"}},"outputs":[],"source":["%pip install labelbox labelspark"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bffc0447-1897-4dcb-869b-49ffa996e673","showTitle":false,"title":""}},"outputs":[],"source":["#This will import Koalas or Pandas-on-Spark based on your DBR version. \n","from pyspark import SparkContext\n","from packaging import version\n","sc = SparkContext.getOrCreate()\n","if version.parse(sc.version) < version.parse(\"3.2.0\"):\n"," import databricks.koalas as pd \n"," needs_koalas = True \n","else:\n"," import pyspark.pandas as pd\n"," needs_koalas = False"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"06ff0dac-d4ab-4182-bd6a-735781250779","showTitle":false,"title":""}},"source":["## Configure the SDK\n","\n","Now that Labelbox and the Databricks libraries have been installed, you will need to configure the SDK. You will need an API key that you can create through the app [here](https://app.labelbox.com/account/api-keys). You can also store the key using Databricks Secrets API. The SDK will attempt to use the env var `LABELBOX_API_KEY`"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"acdfe8c6-1d76-46af-a945-162f5c8a1e26","showTitle":false,"title":""}},"outputs":[],"source":["from labelbox import Client, Dataset\n","from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n","from labelbox.schema.media_type import MediaType\n","import labelspark\n","\n","API_KEY = \"\" \n","\n","if not(API_KEY):\n"," raise ValueError(\"Go to Labelbox to get an API key\")\n"," \n","client = Client(API_KEY)"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"a9d30902-fce5-494a-99d2-8b6c4780a8c9","showTitle":false,"title":""}},"source":["## Create seed data\n","\n","Next we'll load a demo dataset into a Spark table so you can see how to easily load assets into Labelbox via URLs with the Labelbox Connector for Databricks. \n","\n","Also, Labelbox has native support for AWS, Azure, and GCP cloud storage. You can connect Labelbox to your storage via [Delegated Access](https://docs.labelbox.com/docs/iam-delegated-access) and easily load those assets for annotation. For more information, you can watch this [video](https://youtu.be/wlWo6EmPDV4).\n","\n","You can also add data to Labelbox [using the Labelbox SDK directly](https://docs.labelbox.com/docs/datasets-datarows). We recommend using the SDK if you have complicated dataset creation requirements (e.g. including metadata with your dataset) which aren't handled by the Labelbox Connector for Databricks."]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"4ffa24ed-d12c-46ac-92be-159bf5e09762","showTitle":false,"title":""}},"outputs":[],"source":["sample_dataset_dict = {\"external_id\":[\"sample1.jpg\",\n"," \"sample2.jpg\",\n"," \"sample3.jpg\",\n"," \"sample4.jpg\",\n"," \"sample5.jpg\",\n"," \"sample6.jpg\",\n"," \"sample7.jpg\",\n"," \"sample8.jpg\",\n"," \"sample9.jpg\",\n"," \"sample10.jpg\"],\n"," \"row_data\":[\"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000247422.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000484849.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000215782.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_val2014_000000312024.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000486139.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000302713.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000523272.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000094514.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_val2014_000000050578.jpg\",\n"," \"https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000073727.jpg\"]}\n","\n","df = pd.DataFrame.from_dict(sample_dataset_dict).to_spark() #produces our demo Spark table of datarows for Labelbox"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bfaaf86c-3497-4096-a04e-95fa1bb3a576","showTitle":false,"title":""}},"outputs":[],"source":["# can parse the directory and make a Spark table of image URLs\n","SAMPLE_TABLE = \"sample_unstructured_data\"\n","\n","tblList = spark.catalog.listTables()\n","\n","if not any([table.name == SAMPLE_TABLE for table in tblList]):\n"," df.createOrReplaceTempView(SAMPLE_TABLE)\n"," print(f\"Registered table: {SAMPLE_TABLE}\")"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"4709566b-9d52-4e47-beaa-449071928b24","showTitle":false,"title":""}},"source":["You should now have a temporary table \"sample_unstructured_data\" which includes the file names and URLs for some demo images. We're going to use this table with Labelbox using the Labelbox Connector for Databricks!"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"9b2f6cc7-b8c8-4a42-a634-78d1ab9c7984","showTitle":false,"title":""}},"outputs":[],"source":["display(sqlContext.sql(f\"select * from {SAMPLE_TABLE} LIMIT 5\"))"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"b186a2ad-0640-435e-88e8-a94b7439b3c3","showTitle":false,"title":""}},"source":["## Create a Labeling Project\n","\n","Projects are where teams create labels. A project is requires a dataset of assets to be labeled and an ontology to configure the labeling interface.\n","\n","### Step 1: Create a dataaset\n","\n","The [Labelbox Connector for Databricks](https://pypi.org/project/labelspark/) expects a spark table with two columns; the first column \"external_id\" and second column \"row_data\"\n","\n","external_id is a filename, like \"birds.jpg\" or \"my_video.mp4\"\n","\n","row_data is the URL path to the file. Labelbox renders assets locally on your users' machines when they label, so your labeler will need permission to access that asset. \n","\n","Example: \n","\n","| external_id | row_data |\n","|-------------|--------------------------------------|\n","| image1.jpg | https://url_to_your_asset/image1.jpg |\n","| image2.jpg | https://url_to_your_asset/image2.jpg |\n","| image3.jpg | https://url_to_your_asset/image3.jpg |"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"e711f5c6-c99f-449d-83ef-4de53b5c11d4","showTitle":false,"title":""}},"outputs":[],"source":["unstructured_data = spark.table(SAMPLE_TABLE)\n","\n","demo_dataset = labelspark.create_dataset(client, unstructured_data, name = \"Databricks Demo Dataset\")"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cfea193f-fe59-4413-9bba-4bd37272bc4d","showTitle":false,"title":""}},"outputs":[],"source":["print(\"Open the dataset in the App\")\n","print(f\"https://app.labelbox.com/data/{demo_dataset.uid}\")"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"d15d1331-c969-4b8f-bc8c-04c0756fb2d2","showTitle":false,"title":""}},"source":["### Step 2: Create a project\n","\n","You can use the labelbox SDK to build your ontology (we'll do that next) You can also set your project up entirely through our website at app.labelbox.com.\n","\n","Check out our [ontology creation documentation.](https://docs.labelbox.com/docs/configure-ontology)"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"a0eca58e-f98e-429e-95d9-74f67cf1464d","showTitle":false,"title":""}},"outputs":[],"source":["# Create a new project\n","project_demo = client.create_project(name=\"Labelbox and Databricks Example\", media_type=MediaType.Image)\n","project_demo.datasets.connect(demo_dataset) # add the dataset to the queue\n","\n","ontology = OntologyBuilder()\n","\n","tools = [\n"," Tool(tool=Tool.Type.BBOX, name=\"Car\"),\n"," Tool(tool=Tool.Type.BBOX, name=\"Flower\"),\n"," Tool(tool=Tool.Type.BBOX, name=\"Fruit\"),\n"," Tool(tool=Tool.Type.BBOX, name=\"Plant\"),\n"," Tool(tool=Tool.Type.SEGMENTATION, name=\"Bird\"),\n"," Tool(tool=Tool.Type.SEGMENTATION, name=\"Person\"),\n"," Tool(tool=Tool.Type.SEGMENTATION, name=\"Dog\"),\n"," Tool(tool=Tool.Type.SEGMENTATION, name=\"Gemstone\"),\n","]\n","for tool in tools: \n"," ontology.add_tool(tool)\n","\n","conditions = [\"clear\", \"overcast\", \"rain\", \"other\"]\n","\n","weather_classification = Classification(\n"," class_type=Classification.Type.RADIO,\n"," instructions=\"what is the weather?\", \n"," options=[Option(value=c) for c in conditions]\n",") \n","ontology.add_classification(weather_classification)\n","\n","\n","# Setup editor\n","for editor in client.get_labeling_frontends():\n"," if editor.name == 'Editor':\n"," project_demo.setup(editor, ontology.asdict()) \n","\n","print(\"Project Setup is complete.\")"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"0f22df34-988f-4048-9fa7-d7a0ebc6173c","showTitle":false,"title":""}},"source":["### Step 3: Go label data"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"9fcc8ca1-af39-4bb9-9e1e-fd5202a5e46c","showTitle":false,"title":""}},"outputs":[],"source":["print(\"Open the project to start labeling\")\n","print(f\"https://app.labelbox.com/projects/{project_demo.uid}/overview\")"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"d01c2981-ab23-44df-a70c-9dcc617d5b6b","showTitle":false,"title":""},"collapsed":false,"pycharm":{"name":"#%%\n"}},"outputs":[],"source":["raise ValueError(\"Go label some data before continuing\")"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"19634126-4ccd-4372-a459-9db511785a22","showTitle":false,"title":""}},"source":["## Exporting labels/annotations\n","\n","After creating labels in Labelbox you can export them to use in Databricks for model training and analysis."]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"96031aa4-b947-4c9b-ad5c-9a6422fa3ec0","showTitle":false,"title":""}},"outputs":[],"source":["LABEL_TABLE = \"exported_labels\""]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"09bab9cb-9271-4029-af20-df9bab287c72","showTitle":false,"title":""}},"outputs":[],"source":["labels_table = labelspark.get_annotations(client, project_demo.uid, spark, sc)\n","labels_table.createOrReplaceTempView(LABEL_TABLE)\n","display(labels_table)"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"7f88d026-d4f3-4efc-929a-39f2d5393448","showTitle":false,"title":""}},"source":["## Other features of Labelbox\n","\n","[Model Assisted Labeling](https://docs.labelbox.com/docs/model-assisted-labeling)\n","
Once you train a model on your initial set of unstructured data, you can plug that model into Labelbox to support a Model Assisted Labeling workflow. Review the outputs of your model, make corrections, and retrain with ease! You can reduce future labeling costs by >50% by leveraging model assisted labeling.\n","\n","\"MAL\"\n","\n","[Catalog](https://docs.labelbox.com/docs/catalog)\n","
Once you've created datasets and annotations in Labelbox, you can easily browse your datasets and curate new ones in Catalog. Use your model embeddings to find images by similarity search. \n","\n","\"Catalog\"\n","\n","[Model Diagnostics](https://labelbox.com/product/model-diagnostics)\n","
Labelbox complements your MLFlow experiment tracking with the ability to easily visualize experiment predictions at scale. Model Diagnostics helps you quickly identify areas where your model is weak so you can collect the right data and refine the next model iteration. \n","\n","\"Diagnostics\""]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"8eaf3897-b0a8-44e9-aea4-fe58d5ee246f","showTitle":true,"title":"More Info"}},"source":["While using the Labelbox Connector for Databricks, you will likely use the Labelbox SDK (e.g. for programmatic ontology creation). These resources will help familiarize you with the Labelbox Python SDK: \n","* [Visit our docs](https://labelbox.com/docs/python-api) to learn how the SDK works\n","* Checkout our [notebook examples](https://github.com/Labelbox/labelspark/tree/master/notebooks) to follow along with interactive tutorials\n","* view our [API reference](https://labelbox.com/docs/python-api/api-reference).\n","\n","Questions or comments? Reach out to us at [ecosystem+databricks@labelbox.com](mailto:ecosystem+databricks@labelbox.com)"]},{"cell_type":"markdown","metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"1e446cb2-0338-4e5a-9d03-fc24901b3109","showTitle":false,"title":""}},"source":["Copyright Labelbox, Inc. 2022. The source in this notebook is provided subject to the [Labelbox Terms of Service](https://docs.labelbox.com/page/terms-of-service). All included or referenced third party libraries are subject to the licenses set forth below.\n","\n","|Library Name|Library license | Library License URL | Library Source URL |\n","|---|---|---|---|\n","|Labelbox Python SDK|Apache-2.0 License |https://github.com/Labelbox/labelbox-python/blob/develop/LICENSE|https://github.com/Labelbox/labelbox-python\n","|Labelbox Connector for Databricks|Apache-2.0 License |https://github.com/Labelbox/labelspark/blob/master/LICENSE|https://github.com/Labelbox/labelspark\n","|Python|Python Software Foundation (PSF) |https://github.com/python/cpython/blob/master/LICENSE|https://github.com/python/cpython|\n","|Apache Spark|Apache-2.0 License |https://github.com/apache/spark/blob/master/LICENSE|https://github.com/apache/spark|"]}],"metadata":{"application/vnd.databricks.v1+notebook":{"dashboards":[],"language":"python","notebookMetadata":{"pythonIndentUnit":2},"notebookName":"labelbox_databricks_example","notebookOrigID":2233145370303228,"widgets":{}},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} diff --git a/examples/integrations/databricks/labelbox_databricks_example.py b/examples/integrations/databricks/labelbox_databricks_example.py index b6ac98ecd..1ea86bc26 100644 --- a/examples/integrations/databricks/labelbox_databricks_example.py +++ b/examples/integrations/databricks/labelbox_databricks_example.py @@ -201,7 +201,7 @@ weather_classification = Classification( class_type=Classification.Type.RADIO, - name="what is the weather?", + instructions="what is the weather?", options=[Option(value=c) for c in conditions]) ontology.add_classification(weather_classification) diff --git a/examples/prediction_upload/html_predictions.ipynb b/examples/prediction_upload/html_predictions.ipynb index c5480a5b0..9fa0478ba 100644 --- a/examples/prediction_upload/html_predictions.ipynb +++ b/examples/prediction_upload/html_predictions.ipynb @@ -351,7 +351,7 @@ "cell_type": "markdown", "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool and classification names should match the `name` field in your annotations to ensure the correct feature schemas are matched.\n" + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" ], "metadata": { "id": "oy0umzuNIceP" @@ -366,18 +366,18 @@ " classifications=[ # List of Classification objects\n", " Classification( \n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question\", \n", + " instructions=\"radio_question\", \n", " options=[Option(value=\"first_radio_answer\")]\n", " ),\n", " Classification( \n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question_sub\", \n", + " instructions=\"radio_question_sub\", \n", " options=[\n", " Option(value=\"first_radio_answer\",\n", " options=[\n", " Classification(\n", " class_type=Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " instructions=\"sub_radio_question\",\n", " options=[\n", " Option(value=\"first_sub_radio_answer\")\n", " ]\n", @@ -388,7 +388,7 @@ " ),\n", " Classification( \n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"checklist_question\", \n", + " instructions=\"checklist_question\", \n", " options=[\n", " Option(value=\"first_checklist_answer\"),\n", " Option(value=\"second_checklist_answer\"), \n", @@ -397,17 +397,17 @@ " ), \n", " Classification( \n", " class_type=Classification.Type.TEXT,\n", - " name=\"free_text\"\n", + " instructions=\"free_text\"\n", " ),\n", " Classification(\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"nested_checklist_question\",\n", + " instructions=\"nested_checklist_question\",\n", " options=[\n", " Option(\"first_checklist_answer\",\n", " options=[\n", " Classification(\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"sub_checklist_question\", \n", + " instructions=\"sub_checklist_question\", \n", " options=[Option(\"first_sub_checklist_answer\")]\n", " )\n", " ]\n", diff --git a/examples/prediction_upload/image_predictions.ipynb b/examples/prediction_upload/image_predictions.ipynb index a490e7705..25c1beba9 100644 --- a/examples/prediction_upload/image_predictions.ipynb +++ b/examples/prediction_upload/image_predictions.ipynb @@ -584,7 +584,7 @@ "cell_type": "markdown", "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool and classification names should match the `name` field in your annotations to ensure the correct feature schemas are matched.\n" + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" ], "metadata": { "id": "oy0umzuNIceP" @@ -597,7 +597,7 @@ " classifications=[ # List of Classification objects\n", " Classification( # Radio classification given the name \"text\" with two options: \"first_radio_answer\" and \"second_radio_answer\"\n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question\", \n", + " instructions=\"radio_question\", \n", " options=[\n", " Option(value=\"first_radio_answer\"),\n", " Option(value=\"second_radio_answer\")\n", @@ -605,7 +605,7 @@ " ),\n", " Classification( # Checklist classification given the name \"text\" with two options: \"first_checklist_answer\" and \"second_checklist_answer\"\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"checklist_question\", \n", + " instructions=\"checklist_question\", \n", " options=[\n", " Option(value=\"first_checklist_answer\"),\n", " Option(value=\"second_checklist_answer\") \n", @@ -613,17 +613,17 @@ " ), \n", " Classification( # Text classification given the name \"text\"\n", " class_type=Classification.Type.TEXT,\n", - " name=\"free_text\"\n", + " instructions=\"free_text\"\n", " ),\n", " Classification(\n", " class_type=Classification.Type.RADIO, \n", - " name=\"nested_radio_question\",\n", + " instructions=\"nested_radio_question\",\n", " options=[\n", " Option(\"first_radio_answer\",\n", " options=[\n", " Classification(\n", " class_type=Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " instructions=\"sub_radio_question\",\n", " options=[Option(\"first_sub_radio_answer\")]\n", " )\n", " ]\n", @@ -632,13 +632,13 @@ " ),\n", " Classification(\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"nested_checklist_question\",\n", + " instructions=\"nested_checklist_question\",\n", " options=[\n", " Option(\"first_checklist_answer\",\n", " options=[\n", " Classification(\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"sub_checklist_question\", \n", + " instructions=\"sub_checklist_question\", \n", " options=[Option(\"first_sub_checklist_answer\")]\n", " )\n", " ]\n", @@ -656,7 +656,7 @@ " classifications=[\n", " Classification(\n", " class_type=Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " instructions=\"sub_radio_question\",\n", " options=[\n", " Option(value=\"first_sub_radio_answer\")\n", " ]\n", diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb index aa1763c75..6db5b16d4 100644 --- a/examples/prediction_upload/text_predictions.ipynb +++ b/examples/prediction_upload/text_predictions.ipynb @@ -370,8 +370,11 @@ "cell_type": "markdown", "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool and classification names should match the `name` field in your annotations to ensure the correct feature schemas are matched.\n" - ] + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" + ], + "metadata": { + "id": "oy0umzuNIceP" + } }, { "cell_type": "code", @@ -382,18 +385,18 @@ " classifications=[ # List of Classification objects\n", " Classification( \n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question\", \n", + " instructions=\"radio_question\", \n", " options=[Option(value=\"first_radio_answer\")]\n", " ),\n", " Classification( \n", " class_type=Classification.Type.RADIO, \n", - " name=\"radio_question_sub\", \n", + " instructions=\"radio_question_sub\", \n", " options=[\n", " Option(value=\"first_radio_answer\",\n", " options=[\n", " Classification(\n", " class_type=Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " instructions=\"sub_radio_question\",\n", " options=[\n", " Option(value=\"first_sub_radio_answer\")\n", " ]\n", @@ -404,7 +407,7 @@ " ),\n", " Classification( \n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"checklist_question\", \n", + " instructions=\"checklist_question\", \n", " options=[\n", " Option(value=\"first_checklist_answer\"),\n", " Option(value=\"second_checklist_answer\"), \n", @@ -413,17 +416,17 @@ " ), \n", " Classification( \n", " class_type=Classification.Type.TEXT,\n", - " name=\"free_text\"\n", + " instructions=\"free_text\"\n", " ),\n", " Classification(\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"nested_checklist_question\",\n", + " instructions=\"nested_checklist_question\",\n", " options=[\n", " Option(\"first_checklist_answer\",\n", " options=[\n", " Classification(\n", " class_type=Classification.Type.CHECKLIST, \n", - " name=\"sub_checklist_question\", \n", + " instructions=\"sub_checklist_question\", \n", " options=[Option(\"first_sub_checklist_answer\")]\n", " )\n", " ]\n", diff --git a/examples/prediction_upload/video_predictions.ipynb b/examples/prediction_upload/video_predictions.ipynb index a4fa167de..7c433c9a5 100644 --- a/examples/prediction_upload/video_predictions.ipynb +++ b/examples/prediction_upload/video_predictions.ipynb @@ -556,8 +556,11 @@ "cell_type": "markdown", "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool and classification names should match the `name` field in your annotations to ensure the correct feature schemas are matched.\n" - ] + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" + ], + "metadata": { + "id": "oy0umzuNIceP" + } }, { "cell_type": "code", @@ -566,7 +569,7 @@ " classifications=[ # List of Classification objects\n", " lb.Classification( # Radio classification given the name \"text\" with two options: \"first_radio_answer\" and \"second_radio_answer\"\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\", \n", + " instructions=\"radio_question\", \n", " options=[\n", " lb.Option(value=\"first_radio_answer\"),\n", " lb.Option(value=\"second_radio_answer\")\n", @@ -574,7 +577,7 @@ " ),\n", " lb.Classification( # Checklist classification given the name \"text\" with two options: \"first_checklist_answer\" and \"second_checklist_answer\"\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\", \n", + " instructions=\"checklist_question\", \n", " options=[\n", " lb.Option(value=\"first_checklist_answer\"),\n", " lb.Option(value=\"second_checklist_answer\")\n", @@ -582,13 +585,13 @@ " ), \n", " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", + " instructions=\"nested_radio_question\",\n", " options=[\n", " lb.Option(\"first_radio_answer\",\n", " options=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " instructions=\"sub_radio_question\",\n", " options=[lb.Option(\"first_sub_radio_answer\")]\n", " )\n", " ]\n", @@ -597,13 +600,13 @@ " ),\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", + " instructions=\"nested_checklist_question\",\n", " options=[\n", " lb.Option(value=\"first_checklist_answer\",\n", " options=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\", \n", + " instructions=\"sub_checklist_question\", \n", " options=[lb.Option(\"first_sub_checklist_answer\")]\n", " )\n", " ]\n", @@ -621,7 +624,7 @@ " classifications=[\n", " lb.Classification(\n", " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", + " instructions=\"sub_radio_question\",\n", " options=[\n", " lb.Option(value=\"first_sub_radio_answer\")\n", " ]\n",