diff --git a/examples/basics/custom_embeddings.ipynb b/examples/basics/custom_embeddings.ipynb index 8caffd10d..8519cd63b 100644 --- a/examples/basics/custom_embeddings.ipynb +++ b/examples/basics/custom_embeddings.ipynb @@ -50,11 +50,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { - "id": "wRIdzkYf7T18" + "id": "wRIdzkYf7T18", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "13ee42f0-4206-493f-f402-ac7c84916e5e" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m189.2/189.2 KB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m52.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for pygeotile (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], "source": [ "# labelbox\n", "!pip3 install -q labelbox[data]\n", @@ -75,13 +90,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9k82ueIu8Dy1", - "outputId": "cc728790-fc62-4d8d-a3e7-94739ffec809" + "outputId": "5323157e-872b-4bf2-a65c-0fa9662cdbe8" }, "outputs": [ { @@ -90,7 +105,9 @@ "text": [ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 KB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for advlib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" ] } ], @@ -136,13 +153,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": { "id": "tOIyo5pC7PTz" }, "outputs": [], "source": [ "# get images from a Labelbox dataset\n", + "# Our systems start to process data after 1000 embeddings of each type, for this demo make sure your dataset is over 1000 data rows", "dataset = client.get_dataset(\"\")\n", "drs = list(dataset.export_data_rows(timeout_seconds=9999))\n", "data_row_ids = [dr.uid for dr in drs]" @@ -161,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": { "id": "iJFGf0w7swnW" }, @@ -193,37 +211,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": { "id": "u0ZgybLK67n0", "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "d644f81a-014e-4de9-913a-74211972e9b2" + "outputId": "7e1991a0-8e0a-4e63-e8d8-dfdcf095a625" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Nb of custom embedding vectors in sanity_check_payload: 1000\n", - "sanity_check_payload: " - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "IOPub data rate exceeded.\n", - "The notebook server will temporarily stop sending output\n", - "to the client in order to avoid crashing it.\n", - "To change this limit, set the config variable\n", - "`--NotebookApp.iopub_data_rate_limit`.\n", - "\n", - "Current values:\n", - "NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n", - "NotebookApp.rate_limit_window=3.0 (secs)\n", - "\n" + "Nb of custom embedding vectors in sanity_check_payload: 1000\n" ] } ], @@ -251,13 +252,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YQeCS_U98BD2", - "outputId": "178dc3be-6e89-4df8-ec3d-2fa6dacc0be0" + "outputId": "cdeb0bc7-9ea1-4864-88ff-122f6dab8af4" }, "outputs": [ { @@ -266,9 +267,8 @@ "text": [ "00000000-0000-0000-0000-000000000000 - Image Embedding (CLIP ViT-B/32) - dims: 512 \n", "00000000-0000-0000-0000-000000000001 - Text embedding (All-MPNet-base-v2) - dims: 768 \n", - "521eadfe-f8e9-4135-9ead-fef8e9713546 - my_custom_embedding_2048_dimensions - dims: 2048 \n", - "a03948c1-151a-4a1a-b948-c1151a6a1a1d - ResNet50_2048_dimensions - dims: 2048 \n", - "baf8856a-e5f7-4781-b885-6ae5f7b78192 - my_custom_embedding - dims: 8 \n" + "45cafc7a-5314-462a-8afc-7a5314062a3b - my_custom_embedding_2048_dimensions - dims: 2048 \n", + "7d3a6118-589d-4b6c-ba61-18589dbb6ccf - ResNet50_2048_dimensions - dims: 2048 \n" ] } ], @@ -279,26 +279,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "spyHzkLP67dI", - "outputId": "21b6fda1-7a38-4bd5-d244-dfc90b8a5090" + "outputId": "8a046562-5eb1-4fb1-8f23-6958f8b58e1f" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Embedding type created id=521eadfe-f8e9-4135-9ead-fef8e9713546\n" + "Embedding type created id=da5d4b0f-e440-4e2e-9d4b-0fe4400e2e8d\n" ] } ], "source": [ "# # Create a new custom embedding\n", - "!advtool embeddings create my_custom_embedding_2048_dimensions 2048\n", + "!advtool embeddings create my_custom_embedding_2048_dimensions_v2 2048\n", "# will return the ID of the newly created embedding, e.g. 0ddc5d5c-0963-41ad-9c5d-5c0963a1ad98" ] }, @@ -306,7 +306,7 @@ "cell_type": "code", "source": [ "# # Delete a custom embedding\n", - "# !advtool embeddings delete 521eadfe-f8e9-4135-9ead-fef8e9713546" + "# !advtool embeddings delete 2e122b85-7def-44fb-922b-857defe4fb8a" ], "metadata": { "id": "MafxKyncxyvR" @@ -325,13 +325,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "twDd5XNM67Zo", - "outputId": "a7715fe7-3fc3-43d0-8316-bbc45a7dee60" + "outputId": "cb4573de-5417-449b-b560-9f99cfe6eda4" }, "outputs": [ { @@ -345,43 +345,43 @@ } ], "source": [ - "# Upload the payload to Labelbox\n", - "!advtool embeddings import 521eadfe-f8e9-4135-9ead-fef8e9713546 ./payload.ndjson" + "# Upload the payload to Labelbox \n", + "!advtool embeddings import da5d4b0f-e440-4e2e-9d4b-0fe4400e2e8d ./payload.ndjson" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wC0eeEPM9aAM", - "outputId": "5889b2f8-1a07-4748-b3bf-efab545f1417" + "outputId": "55932ddf-2dde-48c1-fd90-29532b8cfdf2" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "0\n" + "1000\n" ] } ], "source": [ - "# count how many data rows have a specific custom embedding\n", - "!advtool embeddings count 521eadfe-f8e9-4135-9ead-fef8e9713546" + "# count how many data rows have a specific custom embedding (This can take a couple of minutes)\n", + "!advtool embeddings count da5d4b0f-e440-4e2e-9d4b-0fe4400e2e8d" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 64, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5AKDMJfO9Z51", - "outputId": "b3b6e7ca-1e99-4563-d8fe-038375008b69" + "outputId": "207f251f-0350-451e-ffcb-661cafb0529f" }, "outputs": [ {