# Retrieve embedding vectors for SEU morphologies

In [1]:
import requests

In [2]:
ENDPOINT = "http://embedder-bbp-dke-dev.ocp.bbp.epfl.ch"

## Get all the models in the catalogue

In [3]:
r = requests.get(
    f'{ENDPOINT}/models/')
print(r)
r.json()

<Response [200]>


{'models': {'SEU_morph_attri2vec_euclidean': {'id': 'SEU_morph_attri2vec_euclidean',
   'name': 'SEU_morph_attri2vec_euclidean',
   'description': 'SEU_morph_attri2vec_euclidean',
   'filename': 'downloads/SEU_morph_attri2vec_euclidean.zip',
   'created': 'Tue Jul  6 14:18:16 2021',
   'modified': 'Mon Jul  5 10:41:44 2021'}}}

## Get a model by name

In [4]:
MODEL_NAME = "SEU_morph_attri2vec_euclidean"

In [5]:
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}')
print(r)
r.json()

<Response [200]>


{'id': 'SEU_morph_attri2vec_euclidean',
 'name': 'SEU_morph_attri2vec_euclidean',
 'description': 'SEU_morph_attri2vec_euclidean',
 'filename': 'downloads/SEU_morph_attri2vec_euclidean.zip',
 'created': 'Tue Jul  6 14:18:16 2021',
 'modified': 'Mon Jul  5 10:41:44 2021'}

## Get details on different model components

In [6]:
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/preprocessor/')
print(r)
r.json()

<Response [200]>


{'heterogeneous': 'False',
 'drop_types': 'False',
 'encode_types': 'False',
 'edge_features': 'False',
 'categorical_encoding': 'multibin',
 'text_encoding': 'tfidf',
 'text_encoding_max_dimension': '128',
 'missing_numeric': 'impute',
 'imputation_strategy': 'mean',
 'standardize_numeric': 'True',
 'node_properties': "['brainLocation_brainRegion_id', 'brainLocation_coordinatesInBrainAtlas_valueX_value', 'brainLocation_coordinatesInBrainAtlas_valueY_value', 'brainLocation_coordinatesInBrainAtlas_valueZ_value', 'somaNumberOfPoints_value', 'subject_name', 'has_Axon_feature', 'has_BasalDendrite_feature', 'has_ApicalDendrite_feature', 'BasalDendrite_cumulatedLength', 'BasalDendrite_longestBranchLength', 'BasalDendrite_longestBranchNumberOfNodes', 'BasalDendrite_numberOfProjections', 'BasalDendrite_projectionBrainRegion', 'BasalDendrite_traversedBrainRegion', 'Axon_cumulatedLength', 'Axon_longestBranchLength', 'Axon_longestBranchNumberOfNodes', 'Axon_numberOfProjections', 'Axon_projectionB

In [7]:
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/embedder/')
print(r)
r.json()

<Response [200]>


{'interface': 'StellarGraphNodeEmbedder',
 'model_type': 'inductive',
 'trained': 'True',
 'model_name': 'attri2vec',
 'model_params': "{'length': 5, 'number_of_walks': 10, 'epochs': 10, 'embedding_dimension': 128, 'batch_size': 20, 'negative_samples': 10, 'num_samples': [10, 5], 'random_walk_p': 0.5, 'random_walk_q': 2.0, 'clusters': 2, 'clusters_q': 1}",
 'graph_configs': "{'directed': True, 'include_type': False, 'feature_props': None, 'feature_vector_prop': 'features', 'edge_weight': 'frequency'}"}

In [8]:
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/similarity-processor/')
print(r)
r.json()

<Response [200]>


{'similarity': 'euclidean',
 'dimension': '128',
 'segmented': 'False',
 'interface': 'SimilarityProcessor'}

## Get resource embeddings

In [11]:
%%time
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/embedding',
    params={
        "resource_ids": [
            "https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/608c996a-15a3-4d8a-aa4a-827fa6946f9b",
            "https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/e1977f18-059c-4039-b1b9-e982f0c2bdb8",
            "lalala not in the index"
        ]
    })
print(r)
r.json()

<Response [200]>
CPU times: user 9.34 ms, sys: 4.33 ms, total: 13.7 ms
Wall time: 47 ms


{'vectors': {'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/608c996a-15a3-4d8a-aa4a-827fa6946f9b': [0.9938040971755981,
   0.11907371878623962,
   0.9720897674560547,
   0.9913535118103027,
   0.03903073072433472,
   0.05238097906112671,
   0.9858897924423218,
   0.12588229775428772,
   0.9556014537811279,
   0.8814420104026794,
   0.0419333279132843,
   0.9818362593650818,
   0.9761927127838135,
   0.9870108366012573,
   0.20886778831481934,
   0.053264766931533813,
   0.22237610816955566,
   0.20299917459487915,
   0.565372884273529,
   0.02974003553390503,
   0.1516365110874176,
   0.9195360541343689,
   0.751937747001648,
   0.9866510033607483,
   0.05084711313247681,
   0.323513925075531,
   0.05458638072013855,
   0.9942514300346375,
   0.662847638130188,
   0.9609318971633911,
   0.9484144449234009,
   0.8712941408157349,
   0.9422887563705444,
   0.9529879093170166,
   0.016747087240219116,
   0.28841525316238403,
   0.9781495332717896,
   0.7823991179466248,
  

## Get nearest neighbors

In [12]:
%%time
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/neighbors/',
    params={
        "resource_ids": [
            "https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/608c996a-15a3-4d8a-aa4a-827fa6946f9b",
            "https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/e1977f18-059c-4039-b1b9-e982f0c2bdb8",
            "lalal not in index"
        ],
        "k": 20
    })
print(r)
r.json()

<Response [200]>
CPU times: user 4.67 ms, sys: 2.21 ms, total: 6.89 ms
Wall time: 64.4 ms


{'neighbors': {'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/608c996a-15a3-4d8a-aa4a-827fa6946f9b': ['https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/608c996a-15a3-4d8a-aa4a-827fa6946f9b',
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/93c7344a-5a7e-49be-967f-ce34051fdff1',
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/eb0289d3-de39-4864-982a-81e288e6dd60',
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/f9d77a6b-5f9a-41c4-9669-6c437b91f149',
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/ac512d1e-6aa2-43e9-8a15-84bffd81d688',
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/ae900192-e4cd-46f7-a0d9-6950632c427c',
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/d779e2cb-90e2-4f63-8b69-c863b0658ef2',
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/4d53dd8c-7ee1-41e1-b32e-13c6c129721d',
   'https://bbp.epfl.ch/neurosciencegraph/dat

In [15]:
%%time
# The similarity index is built on Euclidean distance: the smaller the value, the closer the neighbor
r = requests.get(
    f'{ENDPOINT}/models/{MODEL_NAME}/neighbors/',
    params={
        "resource_ids": [
            "https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/608c996a-15a3-4d8a-aa4a-827fa6946f9b",
            "https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/e1977f18-059c-4039-b1b9-e982f0c2bdb8",
            "lalal not in index"
        ],
        "k": 20,
        "values": True
    })
print(r)
r.json()

<Response [200]>
CPU times: user 8 ms, sys: 3.34 ms, total: 11.3 ms
Wall time: 54 ms


{'neighbors': {'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/608c996a-15a3-4d8a-aa4a-827fa6946f9b': {'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/608c996a-15a3-4d8a-aa4a-827fa6946f9b': 0.0,
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/93c7344a-5a7e-49be-967f-ce34051fdff1': 0.42103111743927,
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/eb0289d3-de39-4864-982a-81e288e6dd60': 0.7957563400268555,
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/f9d77a6b-5f9a-41c4-9669-6c437b91f149': 0.8210515975952148,
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/ac512d1e-6aa2-43e9-8a15-84bffd81d688': 0.8502457737922668,
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/ae900192-e4cd-46f7-a0d9-6950632c427c': 0.8575608134269714,
   'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/d779e2cb-90e2-4f63-8b69-c863b0658ef2': 0.8788225054740906,
   'https://bbp.epfl.ch/neu

## Predict embeddings for unseen points

In [15]:
%%time
r = requests.post(
    f'{ENDPOINT}/models/{MODEL_NAME}/embedding/',
    json={
        "data": ["hello world", "protein", "coronavirus"],
    })
print(r)
vectors = r.json()["vectors"]

<Response [200]>
CPU times: user 3.44 ms, sys: 1.42 ms, total: 4.86 ms
Wall time: 8.38 ms


In [16]:
%%time
r = requests.post(
    f'{ENDPOINT}/models/{MODEL_NAME}/neighbors/',
    params={
        "k": 20,
        "values": True
    },
    json={
        "vectors": vectors
    })
print(r)
r.json()

<Response [200]>
CPU times: user 3.96 ms, sys: 1.55 ms, total: 5.51 ms
Wall time: 14.2 ms


{'neighbors': [{'shell': 0.8483226299285889,
   'shellfish': 0.8483226299285889,
   "st. john's wort": 0.8498218655586243,
   'how worried': 0.8998590707778931,
   'got worse': 0.9149569272994995,
   'frizzled-2': 1.0,
   '3p21.3-p21.1': 1.0,
   '9p13': 1.0,
   '1p13.3-p13.1': 1.0,
   '16p': 1.0,
   'unlikely': 1.0,
   '19: 56026591-56020357': 1.0,
   '21q21.3': 1.0,
   '3: 52419049-52410067': 1.0,
   '2q37': 1.0,
   'cdc73 np_078805.3:p.m1v': 1.0,
   '10: 135229746-135241501': 1.0,
   '1q22-q23': 1.0,
   '13q34': 1.0,
   '10: 115793796-115795518': 1.0},
  {'glycoprotein': 0.0,
   'protein': 0.0,
   'protein xrp2': 0.13581442832946777,
   'protein btg1': 0.13581442832946777,
   'protein nlrc5': 0.13581442832946777,
   'protein wnt-10b': 0.13581442832946777,
   'protein wnt-2b': 0.13581442832946777,
   'protein emsy': 0.13581442832946777,
   'protein wnt-16': 0.13581442832946777,
   'protein hira': 0.13581442832946777,
   'protein wnt-5a': 0.13581442832946777,
   'protein znf365': 0.135