# Embedder API example: term embedding

In [1]:
import requests

In [2]:
ENDPOINT = "http://bmo-linking-bbp-dke-staging.ocp.bbp.epfl.ch"

## Get all the models in the catalogue

In [3]:
r = requests.get(
    f'{ENDPOINT}/models/')
print(r)
r.json()

<Response [200]>


{'models': {'BMO-linking': {'store_id': None,
   'name': 'BMO-linking',
   'description': None,
   'prefLabel': None,
   'contribution': None,
   'wasAssociatedWith': None,
   'distribution': 'downloads/BMO-linking.zip',
   'created': 'Thu Jan 20 08:12:25 2022',
   'modified': 'Wed Sep  1 12:43:47 2021'}}}

## Get a model by name

In [4]:
MODEL_NAME = "BMO-linking"

In [5]:
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}')
print(r)
r.json()

<Response [200]>


{'store_id': None,
 'name': 'BMO-linking',
 'description': None,
 'prefLabel': None,
 'contribution': None,
 'wasAssociatedWith': None,
 'distribution': 'downloads/BMO-linking.zip',
 'created': 'Thu Jan 20 08:12:25 2022',
 'modified': 'Wed Sep  1 12:43:47 2021'}

## Get details on different model components

In [6]:
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/preprocessor/')
print(r)
r.json()

<Response [200]>


{'analyzer': 'char',
 'binary': 'False',
 'decode_error': 'strict',
 'dtype': "<class 'numpy.float32'>",
 'encoding': 'utf-8',
 'input': 'content',
 'lowercase': 'True',
 'max_df': '1.0',
 'max_features': '1024',
 'min_df': '0.0001',
 'ngram_range': '(3, 3)',
 'norm': 'l2',
 'preprocessor': 'None',
 'smooth_idf': 'True',
 'stop_words': 'None',
 'strip_accents': 'None',
 'sublinear_tf': 'False',
 'token_pattern': '(?u)\\b\\w\\w+\\b',
 'tokenizer': 'None',
 'use_idf': 'True',
 'vocabulary': 'None',
 'interface': 'TfIdfEncoder'}

In [7]:
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/embedder/')
print(r)
r.json()

<Response [404]>


{'success': False, 'message': 'Model does not contain an embedder'}

In [8]:
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/similarity-processor/')
print(r)
r.json()

<Response [200]>


{'similarity': 'euclidean',
 'dimension': '1024',
 'segmented': 'True',
 'interface': 'SimilarityProcessor'}

In [9]:
existing_terms = [
    "l5_lbc",
    "layer 5 bipolar cell",
    "burst non-accommodating electrical type",
    "lalala not in index",
    "emodel building workflow"
]

terms_to_link = [
    "electrophysiological recording",
    "bipolar cell",
    "burst non-accommodating neuron",
    "mariotti cell",
    "e-model reconstruction workflow",
    "burst electrical type"
]

## Get resource embeddings

In [10]:
%%time
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/embedding',
    params={
        "resource_ids": existing_terms
    })
print(r)
r.json()

<Response [200]>
CPU times: user 8.82 ms, sys: 2.52 ms, total: 11.3 ms
Wall time: 60 ms


{'vectors': {'l5_lbc': [0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   

Alternatively, to retrieve embedding vectors for a large number of resources, a POST request can be sent to the same endpoint with the resource IDs in the request body.

In [11]:
%%time
r = requests.post(
    f'{ENDPOINT}/models/{MODEL_NAME}/embedding/',
    json={
        "resource_ids": existing_terms
    })
print(r)
r.json()

<Response [200]>
CPU times: user 5.93 ms, sys: 2.13 ms, total: 8.06 ms
Wall time: 36.9 ms


{'vectors': {'l5_lbc': [0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   

## Get nearest neighbors

In [12]:
%%time
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/neighbors/',
    params={
        "resource_ids": existing_terms,
        "k": 5
    })
print(r)
r.json()

<Response [200]>
CPU times: user 4.5 ms, sys: 1.49 ms, total: 5.99 ms
Wall time: 235 ms


{'neighbors': {'l5_lbc': ['l5_lbc', 'l6_lbc', 'l4_lbc', 'l23_lbc', 'l5_nbc'],
  'layer 5 bipolar cell': ['layer 5 bipolar cell',
   'layer 6 bipolar cell',
   'layer 4 bipolar cell',
   'layer 2/3 bipolar cell'],
  'burst non-accommodating electrical type': ['burst non-accommodating electrical type',
   'burst accommodating electrical type',
   'delayed non-accommodating electrical type',
   'continuous non-accommodating electrical type',
   'continuous accommodating electrical type'],
  'lalala not in index': None,
  'emodel building workflow': ['emodel building workflow',
   'model building workflow',
   'cell building workflow',
   'neuron building workflow',
   'model reaction building workflow']}}

In [13]:
%%time
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/neighbors/',
    params={
        "resource_ids": existing_terms,
        "k": 5,
        "values": True
    })
print(r)
r.json()

<Response [200]>
CPU times: user 4.72 ms, sys: 1.76 ms, total: 6.48 ms
Wall time: 233 ms


{'neighbors': {'l5_lbc': {'l5_lbc': 0.0,
   'l6_lbc': 0.4827069640159607,
   'l4_lbc': 0.7783000469207764,
   'l23_lbc': 0.8714874982833862,
   'l5_nbc': 1.5904083251953125},
  'layer 5 bipolar cell': {'layer 5 bipolar cell': 0.0,
   'layer 6 bipolar cell': 0.5063445568084717,
   'layer 4 bipolar cell': 0.5264204740524292,
   'layer 2/3 bipolar cell': 0.6154482960700989},
  'burst non-accommodating electrical type': {'burst non-accommodating electrical type': 0.0,
   'burst accommodating electrical type': 0.35445094108581543,
   'delayed non-accommodating electrical type': 0.3727114796638489,
   'continuous non-accommodating electrical type': 0.4227624535560608,
   'continuous accommodating electrical type': 0.681501030921936},
  'lalala not in index': None,
  'emodel building workflow': {'emodel building workflow': 0.0,
   'model building workflow': 0.06410790234804153,
   'cell building workflow': 0.29888251423835754,
   'neuron building workflow': 0.5157147645950317,
   'model react

Alternatively, to get nearest neighbors for a large number of resources, a POST request can be sent to the same endpoint with the resource IDs in the request body.

In [14]:
%%time
r = requests.post(
    f'{ENDPOINT}/models/{MODEL_NAME}/neighbors/',
    params={"k": 5, "values": True},
    json={
        "resource_ids": existing_terms,
    })
print(r)
r.json()

<Response [200]>
CPU times: user 5.54 ms, sys: 2.16 ms, total: 7.7 ms
Wall time: 223 ms


{'neighbors': {'l5_lbc': {'l5_lbc': 0.0,
   'l6_lbc': 0.4827069640159607,
   'l4_lbc': 0.7783000469207764,
   'l23_lbc': 0.8714874982833862,
   'l5_nbc': 1.5904083251953125},
  'layer 5 bipolar cell': {'layer 5 bipolar cell': 0.0,
   'layer 6 bipolar cell': 0.5063445568084717,
   'layer 4 bipolar cell': 0.5264204740524292,
   'layer 2/3 bipolar cell': 0.6154482960700989},
  'burst non-accommodating electrical type': {'burst non-accommodating electrical type': 0.0,
   'burst accommodating electrical type': 0.35445094108581543,
   'delayed non-accommodating electrical type': 0.3727114796638489,
   'continuous non-accommodating electrical type': 0.4227624535560608,
   'continuous accommodating electrical type': 0.681501030921936},
  'lalala not in index': None,
  'emodel building workflow': {'emodel building workflow': 0.0,
   'model building workflow': 0.06410790234804153,
   'cell building workflow': 0.29888251423835754,
   'neuron building workflow': 0.5157147645950317,
   'model react

## Predict embeddings for unseen points

In [15]:
%%time
r = requests.post(
    f'{ENDPOINT}/models/{MODEL_NAME}/embedding/',
    json={
        "data": terms_to_link,
    })
print(r)
vectors = r.json()["vectors"]

<Response [200]>
CPU times: user 5.57 ms, sys: 2.07 ms, total: 7.64 ms
Wall time: 67.6 ms


In [16]:
%%time
r = requests.post(
    f'{ENDPOINT}/models/{MODEL_NAME}/neighbors/',
    params={
        "k": 20,
        "values": True
    },
    json={
        "vectors": vectors
    })
print(r)
result = r.json()

<Response [200]>
CPU times: user 6.76 ms, sys: 2.24 ms, total: 9 ms
Wall time: 243 ms


In [17]:
for i, el in enumerate(terms_to_link):
    print(f"Similar terms to '{el}': ")
    if result["neighbors"][i] is not None:
        for p in result["neighbors"][i]:
            print(f"\t- {p}")
    else:
        print(f"\t {el} is not in index")
    print()

Similar terms to 'electrophysiological recording': 
	- neuron electrophysiological feature
	- electrophysiological feature extraction workflow

Similar terms to 'bipolar cell': 
	- layer 6 bipolar cell
	- layer 5 bipolar cell
	- layer 4 bipolar cell
	- layer 2/3 bipolar cell

Similar terms to 'burst non-accommodating neuron': 
	- burst non-accommodating electrical type
	- delayed non-accommodating electrical type
	- continuous non-accommodating electrical type
	- burst accommodating electrical type
	- continuous accommodating electrical type

Similar terms to 'mariotti cell': 
	- layer 6 martinotti cell
	- layer 2 martinotti cell
	- layer 5 martinotti cell
	- layer 4 martinotti cell
	- layer 3 martinotti cell
	- layer 2/3 martinotti cell

Similar terms to 'e-model reconstruction workflow': 
	- model reaction simulation workflow
	- emodel simulation workflow
	- emodel validation threshold

Similar terms to 'burst electrical type': 
	- burst adapting electrical type

