Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 38 additions & 104 deletions examples/basics/custom_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,25 +47,21 @@
"metadata": {},
"source": [
"# labelbox\n",
"!pip3 install -q labelbox[data]\n",
"import labelbox as lb\n",
"#ndjson\n",
"!pip3 install -q ndjson\n",
"import ndjson"
"!pip3 install -q \"labelbox[data]\""
],
"cell_type": "code",
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m189.2/189.2 KB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m52.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for pygeotile (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
]
}
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": [
"import labelbox as lb \n",
"import numpy as np\n",
"import json"
],
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
Expand All @@ -82,19 +78,7 @@
"!pip3 install -q 'git+https://github.com/Labelbox/advlib.git'"
],
"cell_type": "code",
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m62.8/62.8 KB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Building wheel for advlib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
]
}
],
"outputs": [],
"execution_count": null
},
{
Expand Down Expand Up @@ -151,7 +135,7 @@
{
"metadata": {},
"source": [
"import numpy as np\n",
"\n",
"\n",
"nb_data_rows = len(data_row_ids)\n",
"# generate 1000 custom embedding vectors, of dimension 2048 each\n",
Expand Down Expand Up @@ -180,80 +164,49 @@
"metadata": {},
"source": [
"# convert payload to ndjson file\n",
"\n",
"with open('payload.ndjson', 'w') as f:\n",
" ndjson.dump(payload, f)\n",
" sanity_check_payload = json.dump(payload, f)\n",
"\n",
"\n",
"# sanity check that you can read/load the file and the payload is correct\n",
"with open('payload.ndjson') as f:\n",
" sanity_check_payload = ndjson.load(f)\n",
" sanity_check_payload = json.load(f)\n",
" \n",
"\n",
"print(\"Nb of custom embedding vectors in sanity_check_payload: \", len(sanity_check_payload))\n",
"# print(\"sanity_check_payload: \", sanity_check_payload)"
],
"cell_type": "code",
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Nb of custom embedding vectors in sanity_check_payload: 1000\n"
]
}
],
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": [
"# Pick an existing custom embedding, or create a custom embedding"
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": [
"# See all custom embeddings available\n",
"!advtool embeddings list"
],
"cell_type": "code",
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"00000000-0000-0000-0000-000000000000 - Image Embedding (CLIP ViT-B/32) - dims: 512 \n",
"00000000-0000-0000-0000-000000000001 - Text embedding (All-MPNet-base-v2) - dims: 768 \n",
"45cafc7a-5314-462a-8afc-7a5314062a3b - my_custom_embedding_2048_dimensions - dims: 2048 \n",
"7d3a6118-589d-4b6c-ba61-18589dbb6ccf - ResNet50_2048_dimensions - dims: 2048 \n"
]
}
],
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": [
"# # Create a new custom embedding\n",
"!advtool embeddings create my_custom_embedding_2048_dimensions_v2 2048\n",
"# will return the ID of the newly created embedding, e.g. 0ddc5d5c-0963-41ad-9c5d-5c0963a1ad98"
"!advtool embeddings create my_custom_embedding_2048_dimensions 2048\n",
"# will return the ID of the newly created embedding, e.g. cgbjjt5ra07710005liytdf19"
],
"cell_type": "code",
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Embedding type created id=da5d4b0f-e440-4e2e-9d4b-0fe4400e2e8d\n"
]
}
],
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": [
"# # Delete a custom embedding\n",
"# !advtool embeddings delete 2e122b85-7def-44fb-922b-857defe4fb8a"
"# !advtool embeddings delete cj7j0ukre0771000blj4qnxgn"
],
"cell_type": "code",
"outputs": [],
Expand All @@ -270,38 +223,27 @@
"metadata": {},
"source": [
"# Upload the payload to Labelbox \n",
"!advtool embeddings import da5d4b0f-e440-4e2e-9d4b-0fe4400e2e8d ./payload.ndjson"
"!advtool embeddings import cj7j0ukre0771000blj4qnxgn ./payload.ndjson"
],
"cell_type": "code",
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Uploading file: ./payload.ndjson \n",
"Progress: 100.0%\n",
"Check 'advtool embeddings count <embedding id>' for total searchable embeddings\n"
]
}
],
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": [
"# Pick an existing custom embedding, or create a custom embedding"
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": [
"# count how many data rows have a specific custom embedding (This can take a couple of minutes)\n",
"!advtool embeddings count da5d4b0f-e440-4e2e-9d4b-0fe4400e2e8d"
"!advtool embeddings count cj7j0ukre0771000blj4qnxgn"
],
"cell_type": "code",
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1000\n"
]
}
],
"outputs": [],
"execution_count": null
},
{
Expand All @@ -310,16 +252,8 @@
"print(len(payload))"
],
"cell_type": "code",
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1000\n"
]
}
],
"outputs": [],
"execution_count": null
}
]
}
}