From 1250ed0774c1561b527c0bada770c93d38331e92 Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Wed, 30 Aug 2023 12:02:18 -0400 Subject: [PATCH 1/4] fixed broken notebook --- examples/basics/data_rows.ipynb | 277 ++++++++++++++++++-------------- 1 file changed, 157 insertions(+), 120 deletions(-) diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index bdd2db290..d61dfd612 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -37,25 +37,26 @@ { "metadata": {}, "source": [ - "* Data rows are the items that are actually being labeled. We currently support the following:\n", + "* Data rows are the assets that are being labeled. We currently support the following asset types:\n", " * Image\n", " * Text\n", " * Video\n", " * Geospatial / Tiled Imagery\n", " * Audio\n", - " * Documents (Beta)\n", - " * HTML (Beta)\n", - " * DICOM (Beta)\n", + " * Documents \n", + " * HTML \n", + " * DICOM \n", + " * Conversational\n", "* A data row is a member of a dataset \n", "* A data row cannot exist without belonging to a dataset.\n", - "* DataRows are added to labeling tasks by first attaching them to datasets and then attaching datasets to projects." + "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects with data rows to projects " ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "!pip install labelbox" + "!pip install labelbox -q" ], "cell_type": "code", "outputs": [], @@ -75,16 +76,17 @@ { "metadata": {}, "source": [ - "* Set the following cell with your data to run this notebook" + "# API Key and Client\n", + "Provide a valid api key below in order to properly connect to the Labelbox Client." ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "# Pick a project that has a dataset attached, data has external ids, and there are some labels\n", - "# This will modify the project so just pick a dummy one that you don't care about\n", - "PROJECT_ID = \"\"" + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" ], "cell_type": "code", "outputs": [], @@ -93,27 +95,19 @@ { "metadata": {}, "source": [ - "# API Key and Client\n", - "Provide a valid api key below in order to properly connect to the Labelbox Client." + "### Get data rows from projects" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "# Add your api key\n", - "API_KEY = None\n", - "client = lb.Client(api_key=API_KEY)" - ], - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ + "# Get project batches\n", + "# Pick a project that with batches that have data rows with global keys\n", + "PROJECT_ID = \"\"\n", "project = client.get_project(PROJECT_ID)\n", - "dataset = next(project.datasets())\n", + "batches = next(project.batches())\n", + "print(batches)\n", "# This is the same as\n", "# -> dataset = client.get_dataset(dataset_id)" ], @@ -124,27 +118,34 @@ { "metadata": {}, "source": [ - "### Read" + "* Fetch data rows from project's batches" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "data_rows = dataset.data_rows()\n", - "data_row = next(data_rows)" + "data_rows = batches.export_data_rows()\n", + "data_row = next(data_rows)\n", + "print(data_row)" ], "cell_type": "code", "outputs": [], "execution_count": null }, + { + "metadata": {}, + "source": [ + "* Get labels from the data row" + ], + "cell_type": "markdown" + }, { "metadata": {}, "source": [ "# Url\n", - "print(\"Associated dataset\", data_row.dataset())\n", - "print(\"Associated label(s)\", next(data_row.labels()))\n", - "print(\"External id\", data_row.external_id)" + "print(\"Associated label(s)\", list(data_row.labels()))\n", + "print(\"Global key\", data_row.global_key)" ], "cell_type": "code", "outputs": [], @@ -152,17 +153,17 @@ }, { "metadata": {}, - "source": [], - "cell_type": "code", - "outputs": [], - "execution_count": null + "source": [ + "* Get data row ids by using global keys" + ], + "cell_type": "markdown" }, { "metadata": {}, "source": [ - "# External ids can be a reference to your internal datasets\n", - "data_row = dataset.data_row_for_external_id(data_row.external_id)\n", - "print(data_row)" + "global_key = \"\"\n", + "task = client.get_data_row_ids_for_global_keys([global_key])\n", + "print(f\"Data row id: {task['results']}\")" ], "cell_type": "code", "outputs": [], @@ -172,25 +173,23 @@ "metadata": {}, "source": [ "### Create\n", - "* Create a single data row at a time" + "* Create a single data row with and without metadata" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "dataset = client.create_dataset(name=\"testing-dataset\")\n", - "dataset.create_data_row(row_data=\"https://picsum.photos/200/300\")\n", + "dataset = client.create_dataset(name=\"data_rows_demo_dataset\")\n", "\n", - "# It is reccomended that you use external ids but optional.\n", - "# These are useful for users to maintain references to a data_row.\n", - "dataset.create_data_row(row_data=\"https://picsum.photos/200/300\",\n", - " external_id=str(uuid.uuid4()))\n", + "# It is recommended that you add global keys to your data rows.\n", + "dataset.create_data_row(row_data=\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0002.jpeg\",\n", + " global_key=str(uuid.uuid4()))\n", "\n", - "# You can also upload metadata along with your data_row\n", + "# You can also upload metadata along with your data row\n", "mdo = client.get_data_row_metadata_ontology()\n", - "dataset.create_data_row(row_data=\"https://picsum.photos/200/300\",\n", - " external_id=str(uuid.uuid4()),\n", + "dataset.create_data_row(row_data=\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\",\n", + " global_key=str(uuid.uuid4()),\n", " metadata_fields=[\n", " lb.DataRowMetadataField(\n", " schema_id=mdo.reserved_by_name[\"tag\"].uid, # specify the schema id\n", @@ -206,18 +205,35 @@ { "metadata": {}, "source": [ - "* Bulk create data rows (This is much faster than creating individual data rows)" + "* [Recommended] Bulk create data rows (This is much faster than creating individual data rows)" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "task1 = dataset.create_data_rows([{\n", - " lb.DataRow.row_data: \"https://picsum.photos/200/300\"\n", - "}, {\n", - " lb.DataRow.row_data: \"https://picsum.photos/200/300\"\n", - "}])" + "# Create a dataset\n", + "dataset = client.create_dataset(name=\"data_rows_demo_dataset_2\")\n", + "\n", + "uploads = []\n", + "# Generate data rows\n", + "for i in range(1,9):\n", + " uploads.append({\n", + " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n", + " ## add metadata (optional)\n", + " \"metadata_fields\": [\n", + " lb.DataRowMetadataField(\n", + " schema_id=mdo.reserved_by_name[\"tag\"].uid, # specify the schema id\n", + " value=\"tag_string\", # typed inputs\n", + " ),\n", + " ]\n", + " })\n", + "\n", + "task1 = dataset.create_data_rows(uploads)\n", + "task1.wait_till_done()\n", + "print(\"ERRORS: \" , task1.errors)\n", + "print(\"RESULTS:\" , task1.result)" ], "cell_type": "code", "outputs": [], @@ -226,24 +242,45 @@ { "metadata": {}, "source": [ - "# Local paths\n", - "local_data_path = '/tmp/test_data_row.txt'\n", - "with open(local_data_path, 'w') as file:\n", - " file.write(\"sample data\")\n", - "\n", - "task2 = dataset.create_data_rows([local_data_path])" + "* Create data rows with attachments" ], - "cell_type": "code", - "outputs": [], - "execution_count": null + "cell_type": "markdown" }, { "metadata": {}, "source": [ - "# You can mix local files with urls\n", - "task3 = dataset.create_data_rows([{\n", - " lb.DataRow.row_data: \"https://picsum.photos/200/300\"\n", - "}, local_data_path])" + "task4 = dataset.create_data_rows([{\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n", + " \"global_key\": str(uuid.uuid4()),\n", + " \"attachments\": [\n", + " {\n", + " \"type\": \"IMAGE_OVERLAY\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n", + " },\n", + " {\n", + " \"type\": \"RAW_TEXT\",\n", + " \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\"\n", + " },\n", + " {\n", + " \"type\": \"TEXT_URL\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\"\n", + " },\n", + " {\n", + " \"type\": \"IMAGE\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n", + " },\n", + " {\n", + " \"type\": \"VIDEO\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\"\n", + " },\n", + " {\n", + " \"type\": \"HTML\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\"\n", + " }\n", + " ]\n", + " }])\n", + "print(\"ERRORS: \" , task4.errors)\n", + "print(\"RESULTS:\" , task4.result)" ], "cell_type": "code", "outputs": [], @@ -252,32 +289,21 @@ { "metadata": {}, "source": [ - "# Note that you cannot set external_ids at this time when uploading from local files.\n", - "# To do this you have to first\n", - "item_url = client.upload_file(local_data_path)\n", - "task4 = dataset.create_data_rows([{\n", - " lb.DataRow.row_data: item_url,\n", - " lb.DataRow.external_id: str(uuid.uuid4())\n", - "}])" + "* Create data rows using data in your local path" ], - "cell_type": "code", - "outputs": [], - "execution_count": null + "cell_type": "markdown" }, { "metadata": {}, "source": [ - "# You can bulk upload Data Rows with metadata\n", - "task5 = dataset.create_data_rows([{\n", - " lb.DataRow.row_data: \"https://picsum.photos/200/300\",\n", - " lb.DataRow.external_id: str(uuid.uuid4()),\n", - " \"metadata_fields\": [\n", - " lb.DataRowMetadataField(\n", - " schema_id=mdo.reserved_by_name[\"tag\"].uid, # specify the schema id\n", - " value=\"tag_string\", # typed inputs\n", - " ),\n", - " ], \n", - "}])" + "# Local paths\n", + "local_data_path = \"/tmp/test_data_row.txt\"\n", + "with open(local_data_path, 'w') as file:\n", + " file.write(\"sample data\")\n", + "\n", + "task2 = dataset.create_data_rows([local_data_path])\n", + "print(\"ERRORS: \" , task2.errors)\n", + "print(\"RESULTS:\" , task2.result)" ], "cell_type": "code", "outputs": [], @@ -286,14 +312,16 @@ { "metadata": {}, "source": [ - "# Blocking wait until complete\n", - "task1.wait_till_done()\n", - "task2.wait_till_done()\n", - "task3.wait_till_done()\n", - "task4.wait_till_done()\n", - "task5.wait_till_done()\n", - "\n", - "print(task1.status, task2.status, task3.status, task4.status, task5.status)" + "# You can mix local files with urls\n", + "task3 = dataset.create_data_rows([{\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\",\n", + " \"global_key\": str(uuid.uuid4())\n", + " }, {\n", + " \"row_data\": local_data_path,\n", + " \"global_key\": str(uuid.uuid4())\n", + " }])\n", + "print(\"ERRORS: \" , task3.errors)\n", + "print(\"RESULTS:\" , task3.result)" ], "cell_type": "code", "outputs": [], @@ -302,17 +330,20 @@ { "metadata": {}, "source": [ - "### Update" + "### Update\n", + "Only two fields can be updated after a data row is created\n", + "1. Global keys \n", + "2. Row data\n" ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "# Useful for resigning urls\n", + "data_row = client.get_data_row(\"\")\n", "new_id = str(uuid.uuid4())\n", - "data_row.update(external_id=new_id)\n", - "print(data_row.external_id, new_id)" + "data_row.update(global_key=new_id, row_data=\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0005.jpeg\")\n", + "print(data_row)" ], "cell_type": "code", "outputs": [], @@ -321,27 +352,19 @@ { "metadata": {}, "source": [ - "# We can also create attachments\n", - "# Attachments are visible for all projects connected to the data_row\n", + "* Create a single attachemt on an existing data row" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# You can only create one attachment at the time. \n", "data_row.create_attachment(attachment_type=\"TEXT\",\n", - " attachment_value=\"LABELERS WILL SEE THIS \")\n", - "# See more information here:\n", - "# https://docs.labelbox.com/reference/type-image\n", - "# Note that attachment_value must always be a string (url to a video/image or a text value to display)" + " attachment_value=\"LABELERS WILL SEE THIS \")" ], "cell_type": "code", - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "execution_count": null }, { @@ -354,8 +377,15 @@ { "metadata": {}, "source": [ - "data_row.delete()\n", - "# Will remove from the dataset too" + "* Delete a single data row" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "data_row = client.get_data_row(\"\")\n", + "data_row.delete()" ], "cell_type": "code", "outputs": [], @@ -364,7 +394,14 @@ { "metadata": {}, "source": [ - "# Bulk delete a list of data_rows (in this case all of them we just uploaded)\n", + "* Bulk delete data row objects" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Bulk delete a list of data_rows ( limit: 4K data rows per call)\n", "lb.DataRow.bulk_delete(list(dataset.data_rows()))" ], "cell_type": "code", From 84fbf468686745e96e222d118fbf9f1f92fd7f8d Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Wed, 30 Aug 2023 12:07:36 -0400 Subject: [PATCH 2/4] fixed broken notebook --- examples/basics/data_rows.ipynb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index d61dfd612..7c2e9df0d 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -49,7 +49,7 @@ " * Conversational\n", "* A data row is a member of a dataset \n", "* A data row cannot exist without belonging to a dataset.\n", - "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects with data rows to projects " + "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects with the data rows" ], "cell_type": "markdown" }, @@ -102,7 +102,6 @@ { "metadata": {}, "source": [ - "# Get project batches\n", "# Pick a project that with batches that have data rows with global keys\n", "PROJECT_ID = \"\"\n", "project = client.get_project(PROJECT_ID)\n", @@ -143,7 +142,6 @@ { "metadata": {}, "source": [ - "# Url\n", "print(\"Associated label(s)\", list(data_row.labels()))\n", "print(\"Global key\", data_row.global_key)" ], @@ -312,7 +310,7 @@ { "metadata": {}, "source": [ - "# You can mix local files with urls\n", + "# You can mix local files with urls when creating data rows\n", "task3 = dataset.create_data_rows([{\n", " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\",\n", " \"global_key\": str(uuid.uuid4())\n", From d23879601990405ebd8c416b9b89b2dbd45d84fb Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Wed, 30 Aug 2023 12:09:57 -0400 Subject: [PATCH 3/4] fixed broken notebook --- examples/basics/data_rows.ipynb | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index 7c2e9df0d..3415ce63d 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -49,7 +49,7 @@ " * Conversational\n", "* A data row is a member of a dataset \n", "* A data row cannot exist without belonging to a dataset.\n", - "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects with the data rows" + "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects" ], "cell_type": "markdown" }, @@ -102,7 +102,7 @@ { "metadata": {}, "source": [ - "# Pick a project that with batches that have data rows with global keys\n", + "# Pick a project with batches that have data rows with global keys\n", "PROJECT_ID = \"\"\n", "project = client.get_project(PROJECT_ID)\n", "batches = next(project.batches())\n", @@ -247,7 +247,7 @@ { "metadata": {}, "source": [ - "task4 = dataset.create_data_rows([{\n", + "task2 = dataset.create_data_rows([{\n", " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n", " \"global_key\": str(uuid.uuid4()),\n", " \"attachments\": [\n", @@ -277,8 +277,8 @@ " }\n", " ]\n", " }])\n", - "print(\"ERRORS: \" , task4.errors)\n", - "print(\"RESULTS:\" , task4.result)" + "print(\"ERRORS: \" , task2.errors)\n", + "print(\"RESULTS:\" , task2.result)" ], "cell_type": "code", "outputs": [], @@ -299,9 +299,9 @@ "with open(local_data_path, 'w') as file:\n", " file.write(\"sample data\")\n", "\n", - "task2 = dataset.create_data_rows([local_data_path])\n", - "print(\"ERRORS: \" , task2.errors)\n", - "print(\"RESULTS:\" , task2.result)" + "task3 = dataset.create_data_rows([local_data_path])\n", + "print(\"ERRORS: \" , task3.errors)\n", + "print(\"RESULTS:\" , task3.result)" ], "cell_type": "code", "outputs": [], @@ -311,15 +311,15 @@ "metadata": {}, "source": [ "# You can mix local files with urls when creating data rows\n", - "task3 = dataset.create_data_rows([{\n", + "task4 = dataset.create_data_rows([{\n", " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\",\n", " \"global_key\": str(uuid.uuid4())\n", " }, {\n", " \"row_data\": local_data_path,\n", " \"global_key\": str(uuid.uuid4())\n", " }])\n", - "print(\"ERRORS: \" , task3.errors)\n", - "print(\"RESULTS:\" , task3.result)" + "print(\"ERRORS: \" , task4.errors)\n", + "print(\"RESULTS:\" , task4.result)" ], "cell_type": "code", "outputs": [], From 13d7f01b4547f9fc4cc8cfd7a5b8054761a2e6d3 Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Wed, 30 Aug 2023 14:16:18 -0400 Subject: [PATCH 4/4] fixed broken notebook --- examples/basics/data_rows.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index 3415ce63d..1c9f8a1bb 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -358,7 +358,7 @@ "metadata": {}, "source": [ "# You can only create one attachment at the time. \n", - "data_row.create_attachment(attachment_type=\"TEXT\",\n", + "data_row.create_attachment(attachment_type=\"RAW_TEXT\",\n", " attachment_value=\"LABELERS WILL SEE THIS \")" ], "cell_type": "code",