# Documentation
Please read this document before getting started. 
https://docs.google.com/document/d/1C_zZFGNjXq10P1MvEX6MM0TC7HHrkFOp9BB0P_S_2MQ

# Imports

In [None]:
# labelbox
!pip3 install -q labelbox[data]
import labelbox as lb
#ndjson
!pip3 install -q ndjson
import ndjson

# Install the wheel from Github

In [None]:
# for custom embeddings
!pip3 install -q 'git+https://github.com/Labelbox/advlib.git'

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


# Labelbox Credentials

In [None]:
API_KEY = "<ADD YOUR LABELBOX CREDENTIALS>"
client = lb.Client(API_KEY)

# set LABELBOX_API_KEY in bash
%env LABELBOX_API_KEY=$API_KEY
# sanity check it worked
!echo $LABELBOX_API_KEY

# Select data rows in Labelbox for custom embeddings

In [None]:
# get images from a Labelbox dataset
dataset = client.get_dataset("<ADD YOUR DATASET ID>")
drs = list(dataset.export_data_rows(timeout_seconds=9999))
data_row_ids = [dr.uid for dr in drs]

# Create the payload for custom embeddings
It should be a .ndjson file
It does not have to be created through python.

In [None]:
import numpy as np

nb_data_rows = len(data_row_ids)
# generate 1000 custom embedding vectors, of dimension 2048 each
# Labelbox supports custom embeddings of dimension up to 2048
custom_embeddings = [list(np.random.random(2048)) for _ in range(nb_data_rows)]

In [None]:
# create the ndjson payload for custom embeddings
payload = []
for data_row_id,custom_embedding in zip(data_row_ids,custom_embeddings):
  payload.append({"id": data_row_id, "vector": custom_embedding})

print('payload', len(payload),payload[:1])

In [None]:
# convert payload to ndjson file
with open('payload.ndjson', 'w') as f:
    ndjson.dump(payload, f)

# sanity check that you can read/load the file and the payload is correct
with open('payload.ndjson') as f:
    sanity_check_payload = ndjson.load(f)

print("Nb of custom embedding vectors in sanity_check_payload: ", len(sanity_check_payload))
# print("sanity_check_payload: ", sanity_check_payload)

Nb of custom embedding vectors in sanity_check_payload:  1000
sanity_check_payload:  

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



# Pick an existing custom embedding, or create a custom embedding

In [None]:
# See all custom embeddings available
!advtool embeddings list

00000000-0000-0000-0000-000000000000 - Image Embedding (CLIP ViT-B/32)          - dims: 512  
00000000-0000-0000-0000-000000000001 - Text embedding (All-MPNet-base-v2)       - dims: 768  
521eadfe-f8e9-4135-9ead-fef8e9713546 - my_custom_embedding_2048_dimensions      - dims: 2048 
a03948c1-151a-4a1a-b948-c1151a6a1a1d - ResNet50_2048_dimensions                 - dims: 2048 
baf8856a-e5f7-4781-b885-6ae5f7b78192 - my_custom_embedding                      - dims: 8    


In [None]:
# # Create a new custom embedding
!advtool embeddings create my_custom_embedding_2048_dimensions 2048
# will return the ID of the newly created embedding, e.g. 0ddc5d5c-0963-41ad-9c5d-5c0963a1ad98

Embedding type created id=521eadfe-f8e9-4135-9ead-fef8e9713546


In [None]:
# # Delete a custom embedding
# !advtool embeddings delete 521eadfe-f8e9-4135-9ead-fef8e9713546

# Upload the payload to Labelbox

In [None]:
# Upload the payload to Labelbox
!advtool embeddings import 521eadfe-f8e9-4135-9ead-fef8e9713546 ./payload.ndjson

Uploading file: ./payload.ndjson 
Progress: 100.0%
Check 'advtool embeddings count <embedding id>' for total searchable embeddings


In [None]:
# count how many data rows have a specific custom embedding
!advtool embeddings count 521eadfe-f8e9-4135-9ead-fef8e9713546

0


In [None]:
print(len(payload))

1000
