<td>
   <a target="_blank" href="https://labelbox.com" ><img src="https://labelbox.com/blog/content/images/2021/02/logo-v4.svg" width=256/></a>
</td>

<td>
<a href="https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/custom_embeddings.ipynb" target="_blank"><img
src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
</td>

<td>
<a href="https://github.com/Labelbox/labelbox-python/tree/master/examples/basics/custom_embeddings.ipynb" target="_blank"><img
src="https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white" alt="GitHub"></a>
</td>

# Documentation
Please read this document before getting started. 
https://docs.google.com/document/d/1C_zZFGNjXq10P1MvEX6MM0TC7HHrkFOp9BB0P_S_2MQ

# Imports

In [1]:
# labelbox
!pip3 install -q labelbox[data]
import labelbox as lb
#ndjson
!pip3 install -q ndjson
import ndjson

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.2/189.2 KB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m52.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pygeotile (setup.py) ... [?25l[?25hdone


# Install the wheel from Github

In [2]:
# for custom embeddings
!pip3 install -q 'git+https://github.com/Labelbox/advlib.git'

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 KB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for advlib (pyproject.toml) ... [?25l[?25hdone


# Labelbox Credentials

In [None]:
API_KEY = "<ADD YOUR LABELBOX CREDENTIALS>"
client = lb.Client(API_KEY)

# set LABELBOX_API_KEY in bash
%env LABELBOX_API_KEY=$API_KEY
# sanity check it worked
!echo $LABELBOX_API_KEY

# Select data rows in Labelbox for custom embeddings

In [32]:
# get images from a Labelbox dataset
dataset = client.get_dataset("<DATASET-ID>")
drs = list(dataset.export_data_rows(timeout_seconds=9999))
data_row_ids = [dr.uid for dr in drs]

# Create the payload for custom embeddings
It should be a .ndjson file
It does not have to be created through python.

In [33]:
import numpy as np

nb_data_rows = len(data_row_ids)
# generate 1000 custom embedding vectors, of dimension 2048 each
# Labelbox supports custom embeddings of dimension up to 2048
custom_embeddings = [list(np.random.random(2048)) for _ in range(nb_data_rows)]

In [None]:
# create the ndjson payload for custom embeddings
payload = []
for data_row_id,custom_embedding in zip(data_row_ids,custom_embeddings):
  payload.append({"id": data_row_id, "vector": custom_embedding})

print('payload', len(payload),payload[:1])

In [35]:
# convert payload to ndjson file
with open('payload.ndjson', 'w') as f:
    ndjson.dump(payload, f)

# sanity check that you can read/load the file and the payload is correct
with open('payload.ndjson') as f:
    sanity_check_payload = ndjson.load(f)

print("Nb of custom embedding vectors in sanity_check_payload: ", len(sanity_check_payload))
# print("sanity_check_payload: ", sanity_check_payload)

Nb of custom embedding vectors in sanity_check_payload:  1000


# Pick an existing custom embedding, or create a custom embedding

In [36]:
# See all custom embeddings available
!advtool embeddings list

00000000-0000-0000-0000-000000000000 - Image Embedding (CLIP ViT-B/32)          - dims: 512  
00000000-0000-0000-0000-000000000001 - Text embedding (All-MPNet-base-v2)       - dims: 768  
2e122b85-7def-44fb-922b-857defe4fb8a - my_custom_embedding_2048_dimensions_2    - dims: 2048 
45cafc7a-5314-462a-8afc-7a5314062a3b - my_custom_embedding_2048_dimensions      - dims: 2048 
7d3a6118-589d-4b6c-ba61-18589dbb6ccf - ResNet50_2048_dimensions                 - dims: 2048 


In [37]:
# # Create a new custom embedding
!advtool embeddings create my_custom_embedding_2048_dimensions_3 2048
# will return the ID of the newly created embedding, e.g. 0ddc5d5c-0963-41ad-9c5d-5c0963a1ad98

Embedding type created id=7bcf4f71-4dcb-432b-8f4f-714dcbd32b59


In [None]:
# # Delete a custom embedding
# !advtool embeddings delete 521eadfe-f8e9-4135-9ead-fef8e9713546

# Upload the payload to Labelbox

In [38]:
# Upload the payload to Labelbox 
!advtool embeddings import 7bcf4f71-4dcb-432b-8f4f-714dcbd32b59 ./payload.ndjson

Uploading file: ./payload.ndjson 
Progress: 100.0%
Check 'advtool embeddings count <embedding id>' for total searchable embeddings


In [41]:
# count how many data rows have a specific custom embedding (This can take a couple of minutes)
!advtool embeddings count 7bcf4f71-4dcb-432b-8f4f-714dcbd32b59

1000


In [42]:
print(len(payload))

1000
