From 31337f2e3b96c418faa51cd7cb1cced1dd701c1d Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Wed, 12 Jul 2023 17:02:37 -0400 Subject: [PATCH 1/2] latest updates --- examples/basics/custom_embeddings.ipynb | 66 ++++++++++++++----------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/examples/basics/custom_embeddings.ipynb b/examples/basics/custom_embeddings.ipynb index 1a7c57ad8..984b75920 100644 --- a/examples/basics/custom_embeddings.ipynb +++ b/examples/basics/custom_embeddings.ipynb @@ -31,7 +31,7 @@ "metadata": {}, "source": [ "# Documentation\n", - "Please read this document before getting started. \n", + "Please read this document before getting started.\n", "https://docs.google.com/document/d/1C_zZFGNjXq10P1MvEX6MM0TC7HHrkFOp9BB0P_S_2MQ" ], "cell_type": "markdown" @@ -46,7 +46,6 @@ { "metadata": {}, "source": [ - "# labelbox\n", "!pip3 install -q \"labelbox[data]\"" ], "cell_type": "code", @@ -56,7 +55,7 @@ { "metadata": {}, "source": [ - "import labelbox as lb \n", + "import labelbox as lb\n", "import numpy as np\n", "import json" ], @@ -74,7 +73,6 @@ { "metadata": {}, "source": [ - "# for custom embeddings\n", "!pip3 install -q 'git+https://github.com/Labelbox/advlib.git'" ], "cell_type": "code", @@ -115,9 +113,10 @@ "source": [ "# get images from a Labelbox dataset\n", "# Our systems start to process data after 1000 embeddings of each type, for this demo make sure your dataset is over 1000 data rows\n", - "dataset = client.get_dataset(\"\")\n", + "dataset = client.get_dataset(\"\") \n", "drs = list(dataset.export_data_rows(timeout_seconds=9999))\n", - "data_row_ids = [dr.uid for dr in drs]" + "data_row_ids = [dr.uid for dr in drs]\n", + "data_row_ids = data_row_ids[:1000] # keep the first 1000 examples for the sake of this demo" ], "cell_type": "code", "outputs": [], @@ -127,19 +126,19 @@ "metadata": {}, "source": [ "# Create the payload for custom embeddings\n", - "It should be a .ndjson file\n", - "It does not have to be created through python." + "It should be a .ndjson file. \n", + "Every line is a json file that finishes with a \\n character. \n", + "It does not have to be created through python. " ], "cell_type": "markdown" }, { "metadata": {}, "source": [ - "\n", - "\n", "nb_data_rows = len(data_row_ids)\n", - "# generate 1000 custom embedding vectors, of dimension 2048 each\n", - "# Labelbox supports custom embeddings of dimension up to 2048\n", + "print(\"Number of data rows: \", nb_data_rows)\n", + "# generate random vectors, of dimension 2048 each\n", + "# Labelbox supports custom embedding vectors of dimension up to 2048\n", "custom_embeddings = [list(np.random.random(2048)) for _ in range(nb_data_rows)]" ], "cell_type": "code", @@ -149,7 +148,7 @@ { "metadata": {}, "source": [ - "# create the ndjson payload for custom embeddings\n", + "# create the payload for custom embeddings\n", "payload = []\n", "for data_row_id,custom_embedding in zip(data_row_ids,custom_embeddings):\n", " payload.append({\"id\": data_row_id, \"vector\": custom_embedding})\n", @@ -163,19 +162,28 @@ { "metadata": {}, "source": [ - "# convert payload to ndjson file\n", + "# delete any pre-existing file\n", + "import os\n", + "if os.path.exists(\"payload.ndjson\"):\n", + " os.remove(\"payload.ndjson\")\n", "\n", + "# convert the payload to json file\n", "with open('payload.ndjson', 'w') as f:\n", - " sanity_check_payload = json.dump(payload, f)\n", - "\n", - "\n", + " for p in payload:\n", + " f.write(json.dumps(p) + \"\\n\")\n", + " # sanity_check_payload = json.dump(payload, f)" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ "# sanity check that you can read/load the file and the payload is correct\n", "with open('payload.ndjson') as f:\n", - " sanity_check_payload = json.load(f)\n", - " \n", - "\n", - "print(\"Nb of custom embedding vectors in sanity_check_payload: \", len(sanity_check_payload))\n", - "# print(\"sanity_check_payload: \", sanity_check_payload)" + " sanity_check_payload = [json.loads(l) for l in f.readlines()]\n", + "print(\"Nb of custom embedding vectors in sanity_check_payload: \", len(sanity_check_payload))" ], "cell_type": "code", "outputs": [], @@ -184,7 +192,7 @@ { "metadata": {}, "source": [ - "# See all custom embeddings available\n", + "# See all custom embeddings available in your Labelbox workspace\n", "!advtool embeddings list" ], "cell_type": "code", @@ -194,9 +202,9 @@ { "metadata": {}, "source": [ - "# # Create a new custom embedding\n", + "# # Create a new custom embedding, unless you want to re-use one\n", "!advtool embeddings create my_custom_embedding_2048_dimensions 2048\n", - "# will return the ID of the newly created embedding, e.g. cgbjjt5ra07710005liytdf19" + "# this command will return the ID of the newly created embedding, e.g. ciqtgd94607290000ljx4dvh2" ], "cell_type": "code", "outputs": [], @@ -206,7 +214,7 @@ "metadata": {}, "source": [ "# # Delete a custom embedding\n", - "# !advtool embeddings delete cj7j0ukre0771000blj4qnxgn" + "# !advtool embeddings delete ciqtgd94607290000ljx4dvh2" ], "cell_type": "code", "outputs": [], @@ -222,8 +230,8 @@ { "metadata": {}, "source": [ - "# Upload the payload to Labelbox \n", - "!advtool embeddings import cj7j0ukre0771000blj4qnxgn ./payload.ndjson" + "# Upload the payload to Labelbox\n", + "!advtool embeddings import c933bviqn0756000elk07et77 ./payload.ndjson" ], "cell_type": "code", "outputs": [], @@ -240,7 +248,7 @@ "metadata": {}, "source": [ "# count how many data rows have a specific custom embedding (This can take a couple of minutes)\n", - "!advtool embeddings count cj7j0ukre0771000blj4qnxgn" + "!advtool embeddings count c933bviqn0756000elk07et77" ], "cell_type": "code", "outputs": [], From a4e974c05b6473dafe5c99f63f937b844eed35bc Mon Sep 17 00:00:00 2001 From: ovalle15 Date: Fri, 14 Jul 2023 14:35:02 -0400 Subject: [PATCH 2/2] latest update --- examples/basics/custom_embeddings.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/basics/custom_embeddings.ipynb b/examples/basics/custom_embeddings.ipynb index 984b75920..892e77a41 100644 --- a/examples/basics/custom_embeddings.ipynb +++ b/examples/basics/custom_embeddings.ipynb @@ -230,7 +230,7 @@ { "metadata": {}, "source": [ - "# Upload the payload to Labelbox\n", + "# Replace the current id with the newly generated id from the previous step, or any existing custom embedding id.\n", "!advtool embeddings import c933bviqn0756000elk07et77 ./payload.ndjson" ], "cell_type": "code",