From 655e91b5ff69a80d56302c25bae133bf7cfaf07b Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Fri, 28 Oct 2022 10:03:45 -0400
Subject: [PATCH 1/2] Update batches / new paradigm
---
examples/basics/batches.ipynb | 778 +++++++++++++++++++---------------
1 file changed, 429 insertions(+), 349 deletions(-)
diff --git a/examples/basics/batches.ipynb b/examples/basics/batches.ipynb
index 805ad2e9f..d89f41aea 100644
--- a/examples/basics/batches.ipynb
+++ b/examples/basics/batches.ipynb
@@ -1,354 +1,434 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "db768cda",
- "metadata": {
- "id": "db768cda"
- },
- "source": [
- "
\n",
- " \n",
- " | "
- ]
- },
- {
- "cell_type": "markdown",
- "id": "cb5611d0",
- "metadata": {
- "id": "cb5611d0"
- },
- "source": [
- "\n",
- " \n",
- " | \n",
- "\n",
- "\n",
- " \n",
- " | "
- ]
- },
- {
- "cell_type": "markdown",
- "id": "Lup2QNWjaxKg",
- "metadata": {
- "id": "Lup2QNWjaxKg"
- },
- "source": [
- "## Batches (*Currently in Public Beta*)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "KONWmRQkadPf",
- "metadata": {
- "id": "KONWmRQkadPf"
- },
- "source": [
- "* A Batch is collection of datarows picked out of a Data Set.\n",
- "* A Datarow cannot be part of more than one batch in a project.\n",
- "* Batches work for all data types, but there should only be one data type per batch.\n",
- "* Batches may not be shared between projects.\n",
- "* Batches may have Datarows from multiple Datasets.\n",
- "* Datarows can only be attached to a Project as part of a single Batch.\n",
- "* You can set priority for each Batch."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "HoW5ypnyzpqb",
- "metadata": {
- "id": "HoW5ypnyzpqb"
- },
- "outputs": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "db768cda",
+ "metadata": {
+ "id": "db768cda"
+ },
+ "source": [
+ "\n",
+ " \n",
+ " | "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cb5611d0",
+ "metadata": {
+ "id": "cb5611d0"
+ },
+ "source": [
+ "\n",
+ " \n",
+ " | \n",
+ "\n",
+ "\n",
+ " \n",
+ " | "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "Lup2QNWjaxKg",
+ "metadata": {
+ "id": "Lup2QNWjaxKg"
+ },
+ "source": [
+ "## Batches (*Currently in Public Beta*)\n",
+ "https://docs.labelbox.com/docs/batches"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "KONWmRQkadPf",
+ "metadata": {
+ "id": "KONWmRQkadPf"
+ },
+ "source": [
+ "* A Batch is collection of datarows picked out of a Data Set.\n",
+ "* A Datarow cannot be part of more than one batch in a project.\n",
+ "* Batches work for all data types, but there should only be one data type per batch.\n",
+ "* Batches may not be shared between projects.\n",
+ "* Batches may have Datarows from multiple Datasets.\n",
+ "* Datarows can only be attached to a Project as part of a single Batch.\n",
+ "* Currently only benchmarks quality settings is supported in batch projects\n",
+ "* You can set priority for each Batch."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "HoW5ypnyzpqb",
+ "metadata": {
+ "id": "HoW5ypnyzpqb"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install \"labelbox[data]\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6-Us9Gj1zpqc",
+ "metadata": {
+ "id": "6-Us9Gj1zpqc"
+ },
+ "outputs": [],
+ "source": [
+ "from labelbox import DataRow, Client\n",
+ "from labelbox.schema.queue_mode import QueueMode\n",
+ "from labelbox.schema.media_type import MediaType\n",
+ "import random\n",
+ "import uuid"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b0b09aee",
+ "metadata": {
+ "id": "b0b09aee"
+ },
+ "source": [
+ "# API Key and Client\n",
+ "Provide a valid api key below in order to properly connect to the Labelbox Client."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "Ge-dfNh-zpqe",
+ "metadata": {
+ "id": "Ge-dfNh-zpqe"
+ },
+ "outputs": [],
+ "source": [
+ "# Add your api key\n",
+ "API_KEY = None\n",
+ "client = Client(api_key=API_KEY)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "nMVtBYQmzpqe",
+ "metadata": {
+ "id": "nMVtBYQmzpqe",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "a694b297-8d35-4448-c369-921dee2be94d"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "WARNING:labelbox.schema.task:There are errors present. Please look at `task.errors` for more details\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "ERRORS: []\n",
+ "RESULT URL: https://storage.labelbox.com/cl3ahv73w1891087qbwzs3edd%2Fdata-row-imports-results%2Fcl94vbi4g4ijw07y07shadc7k_cl94vbjcv1dh707y2f2g4cwh4.json?Expires=1665619363366&KeyName=labelbox-assets-key-3&Signature=VJOqZZUjnnT4s45on3zzYdcagOs\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Create a dataset\n",
+ "dataset = client.create_dataset(name=\"Demo-Batches-Colab\")\n",
+ "\n",
+ "uploads = []\n",
+ "# Generate data rows\n",
+ "for i in range(1,9):\n",
+ " uploads.append({\n",
+ " 'row_data': f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n",
+ " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n",
+ " })\n",
+ "\n",
+ "data_rows = dataset.create_data_rows(uploads)\n",
+ "data_rows.wait_till_done()\n",
+ "print(\"ERRORS: \" , data_rows.errors)\n",
+ "print(\"RESULT URL: \", data_rows.result_url)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "61CvCD3C7qv6",
+ "metadata": {
+ "id": "61CvCD3C7qv6"
+ },
+ "source": [
+ "# Setup batch project"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "tqtT4q31787T",
+ "metadata": {
+ "id": "tqtT4q31787T",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "d933d0eb-ed71-4aaa-a78d-a8ce6c69b33f"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Project Name: Demo-Batches-Project Project Id: cl94vbpr849gg08ytd6rd423x\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Project defaults to batch mode with benchmark quality settings if the queue mode argument is not provided\n",
+ "# Queue mode will be deprecated once dataset mode is deprecated \n",
+ "\n",
+ "# Create a batch project with benchmark quality control. Consensus is currentely not supported with Batches\n",
+ "project = client.create_project( name=\"Demo-Batches-Project\", \n",
+ " queue_mode=QueueMode.Batch,\n",
+ " auto_audit_percentage=1,\n",
+ " auto_audit_number_of_labels=1,\n",
+ " media_type=MediaType.Image\n",
+ " )\n",
+ "print(\"Project Name:\", project.name ,\n",
+ " \" Project Id:\", project.uid )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9JVLsXdevywS",
+ "metadata": {
+ "id": "9JVLsXdevywS"
+ },
+ "source": [
+ "### Select all data rows from the dataset created earlier that will be added to the batch.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "U4C1ZyJ2EgTS",
+ "metadata": {
+ "id": "U4C1ZyJ2EgTS",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "21ee01a3-c984-4426-b0ed-0b6f47454d8e"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Number of data row ids: 8\n"
+ ]
+ }
+ ],
+ "source": [
+ "data_row_ids = [dr.uid for dr in dataset.export_data_rows()]\n",
+ "print(\"Number of data row ids:\", len(data_row_ids))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Select a random sample\n",
+ "This method is useful if you have large datasets and only want to work with a handful of data rows"
+ ],
+ "metadata": {
+ "id": "pKqURMFUaURa"
+ },
+ "id": "pKqURMFUaURa"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "WJAXBf1bV-td",
+ "metadata": {
+ "id": "WJAXBf1bV-td"
+ },
+ "outputs": [],
+ "source": [
+ "sample = random.sample(data_row_ids, 4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "UPdaTqkgYyvt",
+ "metadata": {
+ "id": "UPdaTqkgYyvt"
+ },
+ "source": [
+ "# Batch Manipulation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "Al-K1lBBEjtb",
+ "metadata": {
+ "id": "Al-K1lBBEjtb"
+ },
+ "source": [
+ "### Create a Batch:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "resH3xqeErVv",
+ "metadata": {
+ "id": "resH3xqeErVv",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "dd178a9b-1544-4361-d750-34d693512b9a"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Number of data rows in batch: 4\n"
+ ]
+ }
+ ],
+ "source": [
+ "batch = project.create_batch(\n",
+ " \"Demo-First-Batch\", # Each batch in a project must have a unique name\n",
+ " sample, # A list of data rows or data row ids\n",
+ " 5 # priority between 1(Highest) - 5(lowest)\n",
+ ")\n",
+ "# number of data rows in the batch\n",
+ "print(\"Number of data rows in batch: \", batch.size)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8Cj64Isxzpqe",
+ "metadata": {
+ "id": "8Cj64Isxzpqe"
+ },
+ "source": [
+ "### Manage Batches\n",
+ "Note: You can view your batch data through the *Data Rows tab*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0a7d1d3e",
+ "metadata": {
+ "id": "0a7d1d3e",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "8e7e2fa3-704d-4107-a393-1eddf02266d9"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Data Rows in Batch: [, , , ]\n",
+ "Batch Name: Demo-First-Batch Batch ID: 39f3fb00-49c1-11ed-ad8c-4b0085ccfe8b\n"
+ ]
+ }
+ ],
+ "source": [
+ "## Export the data row ids\n",
+ "data_rows = [dr for dr in batch.export_data_rows()]\n",
+ "print(\"Data Rows in Batch: \", data_rows)\n",
+ "\n",
+ "## List the batches in your project\n",
+ "for batch in project.batches():\n",
+ " print(\"Batch Name: \", batch.name , \" Batch ID:\", batch.uid)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Archive Batch"
+ ],
+ "metadata": {
+ "id": "OHMvoxRFhF9Z"
+ },
+ "id": "OHMvoxRFhF9Z"
+ },
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "zsh:1: no matches found: labelbox[data]\r\n"
- ]
+ "cell_type": "code",
+ "source": [
+ "# archiving batch removes all queued data rows from the project\n",
+ "batch.remove_queued_data_rows()"
+ ],
+ "metadata": {
+ "id": "IMleblJnhFbs"
+ },
+ "id": "IMleblJnhFbs",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Clean up \n",
+ "Uncomment and run the cell below to delete the batch, dataset and/or project created in this demo"
+ ],
+ "metadata": {
+ "id": "3aI_0KfZeYEf"
+ },
+ "id": "3aI_0KfZeYEf"
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Delete Batch\n",
+ "#batch.delete()\n",
+ "\n",
+ "# Delete Project\n",
+ "#project.delete()\n",
+ "\n",
+ "# Delete DataSet\n",
+ "#dataset.delete()"
+ ],
+ "metadata": {
+ "id": "ev_vlMh6ehH3"
+ },
+ "id": "ev_vlMh6ehH3",
+ "execution_count": null,
+ "outputs": []
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "collapsed_sections": [],
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
}
- ],
- "source": [
- "!pip install labelbox[data]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6-Us9Gj1zpqc",
- "metadata": {
- "id": "6-Us9Gj1zpqc"
- },
- "outputs": [],
- "source": [
- "from labelbox import DataRow, Client\n",
- "from labelbox.schema.queue_mode import QueueMode\n",
- "from labelbox.schema.media_type import MediaType\n",
- "import random"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "qQiozm-dzpqd",
- "metadata": {
- "id": "qQiozm-dzpqd"
- },
- "source": [
- "Set the following cell with your data to run this notebook:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "84Zna5c0zpqd",
- "metadata": {
- "id": "84Zna5c0zpqd"
- },
- "outputs": [],
- "source": [
- "PROJECT_NAME = \"Batch Queue Demo\" #text project\n",
- "DATASET_NAME = \"Batch Queue Demo Data\""
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b0b09aee",
- "metadata": {
- "id": "b0b09aee"
- },
- "source": [
- "# API Key and Client\n",
- "Provide a valid api key below in order to properly connect to the Labelbox Client."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "Ge-dfNh-zpqe",
- "metadata": {
- "id": "Ge-dfNh-zpqe"
- },
- "outputs": [],
- "source": [
- "# Add your api key\n",
- "API_KEY = None\n",
- "client = Client(api_key=API_KEY)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "nMVtBYQmzpqe",
- "metadata": {
- "id": "nMVtBYQmzpqe"
- },
- "outputs": [],
- "source": [
- "dataset = client.create_dataset(name=DATASET_NAME)\n",
- "\n",
- "uploads = []\n",
- "for i in range(10):\n",
- " uploads.append({\n",
- " 'external_id': i,\n",
- " 'row_data': 'https://picsum.photos/200/300'\n",
- " })\n",
- "dataset.create_data_rows(uploads)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "61CvCD3C7qv6",
- "metadata": {
- "id": "61CvCD3C7qv6"
- },
- "source": [
- "# Ensure project is in batch mode:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "tqtT4q31787T",
- "metadata": {
- "id": "tqtT4q31787T"
- },
- "outputs": [],
- "source": [
- "project = client.create_project(name=PROJECT_NAME, queue_mode=QueueMode.Batch)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "Xti9AoZWELrq",
- "metadata": {
- "id": "Xti9AoZWELrq"
- },
- "source": [
- "# Collect Datarow id's:"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "9JVLsXdevywS",
- "metadata": {
- "id": "9JVLsXdevywS"
- },
- "source": [
- "### Select All Data Rows from dataset."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "U4C1ZyJ2EgTS",
- "metadata": {
- "id": "U4C1ZyJ2EgTS"
- },
- "outputs": [],
- "source": [
- "data_row_ids = [dr.uid for dr in dataset.export_data_rows()]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "6699941a",
- "metadata": {},
- "source": []
- },
- {
- "cell_type": "markdown",
- "id": "B0UqO_O1V8ei",
- "metadata": {
- "id": "B0UqO_O1V8ei"
- },
- "source": [
- "### Randomly sample\n",
- "\n",
- "Rather than selecting all of the data we sample 5 data rows at random"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "WJAXBf1bV-td",
- "metadata": {
- "id": "WJAXBf1bV-td"
- },
- "outputs": [],
- "source": [
- "sample = random.sample(data_row_ids, 5)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "UPdaTqkgYyvt",
- "metadata": {
- "id": "UPdaTqkgYyvt"
- },
- "source": [
- "# Batch Manipulation"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "Al-K1lBBEjtb",
- "metadata": {
- "id": "Al-K1lBBEjtb"
- },
- "source": [
- "### Create a Batch:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "resH3xqeErVv",
- "metadata": {
- "id": "resH3xqeErVv"
- },
- "outputs": [],
- "source": [
- "batch = project.create_batch(\n",
- " \"first batch\", # Each batch in a project must have a unique name\n",
- " sample, # A list of data rows or data row ids\n",
- " 5 # priority between 1(Highest) - 5(lowest)\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "gFio7ONOWYdJ",
- "metadata": {
- "id": "gFio7ONOWYdJ"
- },
- "outputs": [],
- "source": [
- "# number of data rows in the batch\n",
- "batch.size"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "8Cj64Isxzpqe",
- "metadata": {
- "id": "8Cj64Isxzpqe"
- },
- "source": [
- "### List DataRows in a Batch\n",
- "Note: You can view your batch through in the *Data Row tab* of the project"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "0a7d1d3e",
- "metadata": {},
- "outputs": [],
- "source": [
- "data_rows = [dr for dr in batch.export_data_rows()]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "rU7iddSQzpqg",
- "metadata": {
- "id": "rU7iddSQzpqg"
- },
- "source": [
- "### Remove queued data rows by batch (Not supported yet)\n",
- "Note: You can do this through the batch management pane on the data rows tab of the project"
- ]
- }
- ],
- "metadata": {
- "colab": {
- "collapsed_sections": [],
- "name": "Batches.ipynb",
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
+ "nbformat": 4,
+ "nbformat_minor": 5
}
From 555d7a72074fc2428c4e25ad5bc48d121c0d369c Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Wed, 2 Nov 2022 15:44:44 -0400
Subject: [PATCH 2/2] Remove beta from title
---
examples/basics/batches.ipynb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/basics/batches.ipynb b/examples/basics/batches.ipynb
index d89f41aea..0ac5823e7 100644
--- a/examples/basics/batches.ipynb
+++ b/examples/basics/batches.ipynb
@@ -37,7 +37,7 @@
"id": "Lup2QNWjaxKg"
},
"source": [
- "## Batches (*Currently in Public Beta*)\n",
+ "## Batches\n",
"https://docs.labelbox.com/docs/batches"
]
},