# Embedding service API

Run the service by, first, exporting your [staging]() token using `export NEXUS_TOKEN=<your_token>`. And then running `flask run`.

In [13]:
import requests

## Get all the models in the catalogue

In [24]:
r = requests.get(
    'http://127.0.0.1:5000/models/')
print(r)
r.json()

<Response [200]>


{'models': {'Test attri2vec model': {'id': 'https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/embedder_catalog/_/0fb24112-8b91-48fc-979d-f47e422fd35f',
   'name': 'Test attri2vec model',
   'description': 'Node embedding model built on the Covid dataset',
   'filename': 'attri2vec_test_model.zip',
   'created': '2021-03-05T14:09:28.593865Z',
   'modified': '2021-03-05T14:09:28.593865Z'}}}

## Get details on different model components

In [25]:
r = requests.get(
    'http://127.0.0.1:5000/model/details/preprocessor/',
    params={
        "name": "Test attri2vec model"
    })
print(r)
r.json()

<Response [200]>


{'heterogeneous': False,
 'drop_types': False,
 'encode_types': False,
 'edge_features': False,
 'categorical_encoding': 'multibin',
 'text_encoding': 'tfidf',
 'text_encoding_max_dimension': 512,
 'missing_numeric': 'drop',
 'imputation_strategy': 'mean',
 'standardize_numeric': True,
 'node_properties': ['definition'],
 'edge_properties': None,
 'interface': 'ScikitLearnPGEncoder'}

In [18]:
r = requests.get(
    'http://127.0.0.1:5000/model/details/embedder/',
    params={
        "name": "Test attri2vec model"
    })
print(r)
r.json()

<Response [200]>


{'interface': 'StellarGraphNodeEmbedder',
 'model_type': 'inductive',
 'trained': 'True',
 'model_name': 'attri2vec',
 'model_params': {'length': 5,
  'number_of_walks': 10,
  'epochs': 5,
  'embedding_dimension': 128,
  'batch_size': 20,
  'negative_samples': 10,
  'num_samples': [10, 5],
  'random_walk_p': 0.5,
  'random_walk_q': 2.0,
  'clusters': 2,
  'clusters_q': 1},
 'graph_configs': {'directed': True,
  'include_type': False,
  'feature_props': None,
  'feature_vector_prop': 'features',
  'edge_weight': 'npmi'}}

In [19]:
r = requests.get(
    'http://127.0.0.1:5000/model/details/similarity-processor/',
    params={
        "name": "Test attri2vec model"
    })
print(r)
r.json()

<Response [200]>


{'similarity': 'cosine',
 'dimension': 128,
 'segmented': False,
 'interface': 'SimilarityProcessor'}

## Get resource embeddings

In [20]:
%%time
r = requests.get(
    'http://127.0.0.1:5000/model/embeddings/',
    params={
        "name": "Test attri2vec model",
        "resource_ids": ["dna replication", "glucose"]
    })
print(r)
r.json()

<Response [200]>
CPU times: user 4.31 ms, sys: 1.99 ms, total: 6.29 ms
Wall time: 16.6 ms


{'embeddings': {'dna replication': [0.07164812088012695,
   0.07756295800209045,
   0.0569116473197937,
   0.051368534564971924,
   0.07309478521347046,
   0.08538609743118286,
   0.06472799181938171,
   0.06495746970176697,
   0.06184706091880798,
   0.0522160530090332,
   0.049841344356536865,
   0.06333291530609131,
   0.06374534964561462,
   0.056498557329177856,
   0.05183088779449463,
   0.04495397210121155,
   0.0541052520275116,
   0.052536219358444214,
   0.0360034704208374,
   0.03352463245391846,
   0.06796756386756897,
   0.05813342332839966,
   0.07896772027015686,
   0.062244415283203125,
   0.0480460524559021,
   0.051534414291381836,
   0.059615641832351685,
   0.05519720911979675,
   0.0645219087600708,
   0.055949240922927856,
   0.09286651015281677,
   0.05681219696998596,
   0.056948333978652954,
   0.04469597339630127,
   0.06077474355697632,
   0.05059939622879028,
   0.04572010040283203,
   0.08580374717712402,
   0.048861920833587646,
   0.05425804853439331,
   

## Get nearest neighbors

In [21]:
%%time
r = requests.get(
    'http://127.0.0.1:5000/model/similar-points/',
    params={
        "name": "Test attri2vec model",
        "resource_ids": ["glucose", "covid-19", "dna replication"],
        "k": 20
    })
print(r)
r.json()

<Response [200]>
CPU times: user 5.57 ms, sys: 2.52 ms, total: 8.08 ms
Wall time: 22.4 ms


{'similar_points': {'glucose': ['glucose',
   'foot',
   'hydrophobicity',
   'anxiety',
   'dog',
   'nasal',
   'proximal',
   'cerebral hemorrhage',
   'endotracheal',
   'water',
   'pruritus',
   'dizziness',
   'constipation',
   'subarachnoid hemorrhage',
   'intracranial hemorrhage',
   'depression',
   'cat',
   'brother',
   'anal injury',
   'bicarbonate ion'],
  'covid-19': ['covid-19',
   'middle east respiratory syndrome',
   'dyspnea',
   'severe acute respiratory syndrome',
   'viral respiratory tract infection',
   'respiratory failure',
   'disease or disorder',
   'pulmonary edema',
   'asthma',
   'h1n1 influenza',
   'dengue fever',
   'infertility',
   'ischemic cerebrovascular accident',
   'wheezing',
   'cystic fibrosis pulmonary exacerbation',
   'cystic fibrosis',
   'airway',
   'chronic obstructive pulmonary disease',
   'childhood-onset systemic lupus erythematosus',
   'septic shock'],
  'dna replication': ['dna replication',
   'nucleotide',
   'mutation

In [22]:
%%time
r = requests.get(
    'http://127.0.0.1:5000/model/similar-points/',
    params={
        "name": "Test attri2vec model",
        "resource_ids": ["glucose", "covid-19", "dna replication"],
        "k": 20,
        "values": True
    })
print(r)
r.json()

<Response [200]>
CPU times: user 3.89 ms, sys: 1.46 ms, total: 5.35 ms
Wall time: 6.17 ms


{'similar_points': {'glucose': {'glucose': 1.0,
   'foot': 0.9908841848373413,
   'hydrophobicity': 0.9902871251106262,
   'anxiety': 0.9894585609436035,
   'dog': 0.9894585609436035,
   'nasal': 0.9894585609436035,
   'proximal': 0.9894585609436035,
   'cerebral hemorrhage': 0.9894585609436035,
   'endotracheal': 0.9894585609436035,
   'water': 0.9894585609436035,
   'pruritus': 0.9894585609436035,
   'dizziness': 0.9894585609436035,
   'constipation': 0.9894585609436035,
   'subarachnoid hemorrhage': 0.9894585609436035,
   'intracranial hemorrhage': 0.9894585609436035,
   'depression': 0.9894585609436035,
   'cat': 0.9894585609436035,
   'brother': 0.9894585609436035,
   'anal injury': 0.9894585609436035,
   'bicarbonate ion': 0.9894585609436035},
  'covid-19': {'covid-19': 1.0,
   'middle east respiratory syndrome': 0.9931174516677856,
   'dyspnea': 0.9874406456947327,
   'severe acute respiratory syndrome': 0.9871326684951782,
   'viral respiratory tract infection': 0.9868187904357

In [60]:
import pandas as pd

In [62]:
pd.DataFrame([{"id": "a", "weight": 3}, {"id": "b", "weight": 4, "strength": 3}])

Unnamed: 0,id,weight,strength
0,a,3,
1,b,4,3.0


In [63]:
l = [(1, 2, {"a": 1, "b": 2}), (1, 2, {"a": 3, "b": 7})]

In [67]:
[{**ll[2], "source_id": ll[0], "t": ll[1]} for ll in l]

[{'a': 1, 'b': 2, 'source_id': 1, 't': 2},
 {'a': 3, 'b': 7, 'source_id': 1, 't': 2}]

In [74]:
from bluegraph.backends.networkx import networkx_to_pgframe
from bluegraph.backends.stellargraph import pgframe_to_stellargraph

In [68]:
import networkx as nx

In [69]:
nxg = nx.karate_club_graph()

In [87]:
nxg.nodes()

NodeView((0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33))

In [145]:
frame = networkx_to_pgframe(nxg)

In [146]:
frame.add_node_types({n: "Node" for n in frame.nodes()[:5]})
frame.add_node_types({n: "La" for n in frame.nodes()[5:]})

In [None]:
frame.add_edge_types({e: "Edge" for e in frame.edges()[:6]})

In [147]:
# frame.add_edge_types({e: "Edge" for e in frame.edges()[:6]})

In [148]:
# frame.add_edge_types({e: "bla" for e in frame.edges()[6:]})

In [149]:
stellar_graph = pgframe_to_stellargraph(frame, include_type=True)

ValueError: edges: expected 'source', 'target', None, '@type' columns, found: 'source', 'target'

In [150]:
stellar_graph._edges

<stellargraph.core.element_data.EdgeData at 0x7f8aee95e390>

In [143]:
stellar_graph.edge_types

Index(['Edge', 'bla'], dtype='object')

In [157]:
edge_dicts = [
    {"@source_id": s, "@target_id": t, "@type": etype, "weight": weight}
    for s, t, etype, weight in
    zip(
        stellar_graph._nodes.ids.from_iloc(stellar_graph._edges.sources),
        stellar_graph._nodes.ids.from_iloc(stellar_graph._edges.targets),
        stellar_graph._edges.type_of_iloc(slice(None)),
        stellar_graph._edges.weights
    )
]
edge_frame = pd.DataFrame(edge_dicts).set_index(["@source_id", "@target_id"])

Unnamed: 0,@source_id,@target_id,@type,weight
0,0,1,Edge,1.0
1,0,2,Edge,1.0
2,0,3,Edge,1.0
3,0,4,Edge,1.0
4,0,5,Edge,1.0
...,...,...,...,...
73,2,8,bla,1.0
74,2,7,bla,1.0
75,2,3,bla,1.0
76,6,16,bla,1.0


In [142]:
for el in iterator:
    print(el)

(0, 1, 'Edge', 1.0)
(0, 2, 'Edge', 1.0)
(0, 3, 'Edge', 1.0)
(0, 4, 'Edge', 1.0)
(0, 5, 'Edge', 1.0)
(0, 6, 'Edge', 1.0)
(22, 33, 'bla', 1.0)
(22, 32, 'bla', 1.0)
(20, 33, 'bla', 1.0)
(20, 32, 'bla', 1.0)
(19, 33, 'bla', 1.0)
(18, 33, 'bla', 1.0)
(18, 32, 'bla', 1.0)
(14, 33, 'bla', 1.0)
(15, 32, 'bla', 1.0)
(23, 25, 'bla', 1.0)
(14, 32, 'bla', 1.0)
(13, 33, 'bla', 1.0)
(9, 33, 'bla', 1.0)
(8, 33, 'bla', 1.0)
(15, 33, 'bla', 1.0)
(23, 27, 'bla', 1.0)
(23, 33, 'bla', 1.0)
(23, 32, 'bla', 1.0)
(31, 32, 'bla', 1.0)
(30, 33, 'bla', 1.0)
(30, 32, 'bla', 1.0)
(29, 33, 'bla', 1.0)
(29, 32, 'bla', 1.0)
(28, 33, 'bla', 1.0)
(28, 31, 'bla', 1.0)
(27, 33, 'bla', 1.0)
(26, 33, 'bla', 1.0)
(26, 29, 'bla', 1.0)
(25, 31, 'bla', 1.0)
(24, 31, 'bla', 1.0)
(24, 27, 'bla', 1.0)
(24, 25, 'bla', 1.0)
(8, 32, 'bla', 1.0)
(23, 29, 'bla', 1.0)
(8, 30, 'bla', 1.0)
(5, 10, 'bla', 1.0)
(5, 16, 'bla', 1.0)
(1, 13, 'bla', 1.0)
(1, 7, 'bla', 1.0)
(1, 3, 'bla', 1.0)
(1, 2, 'bla', 1.0)
(0, 31, 'bla', 1.0)
(0, 21, 'bla

In [133]:
node_dicts = []
node_ids = stellar_graph.nodes()
if stellar_graph.node_types != {"default"}:
    for node_type in stellar_graph.node_types:
        node_ids = stellar_graph.nodes(node_type=node_type)
        features = stellar_graph.node_features(node_ids, node_type=node_type)
        for node_id, node_features in zip(node_ids, features):
            node_dict ={"@id": node_id, "@type": node_type}
            if len(node_features) > 0:
                node_dict["features"] = node_features
            node_dicts.append(node_dict)
else:
    features = stellar_graph.node_features(node_ids)
    for node_id, node_features in zip(node_ids, features):
        node_dict ={"@id": node_id}
        if len(node_features) > 0:
            node_dict["features"] = node_features
        node_dicts.append(node_dict)

In [134]:
pd.DataFrame(node_dicts)

Unnamed: 0,@id,@type
0,5,La
1,6,La
2,7,La
3,8,La
4,9,La
5,10,La
6,11,La
7,12,La
8,13,La
9,14,La


In [116]:
for ty in self.node_types:
    node_ids = self.nodes(node_type=ty)
    ty_dict = {node_type_attr: ty}

    if feature_attr is not None:
        features = self.node_features(node_ids, node_type=ty)

        for node_id, node_features in zip(node_ids, features):
            graph.add_node(
                node_id, **ty_dict, **{feature_attr: node_features},
            )
    else:
        graph.add_nodes_from(node_ids, **ty_dict)

iterator = zip(
    self._nodes.ids.from_iloc(self._edges.sources),
    self._nodes.ids.from_iloc(self._edges.targets),
    self._edges.type_of_iloc(slice(None)),
    self._edges.weights,
)
graph.add_edges_from(
    (src, dst, {edge_type_attr: type_, edge_weight_attr: weight})
    for src, dst, type_, weight in iterator
)

return graph

TypeError: 'ExternalIdIndex' object is not iterable

In [89]:
stellar_graph.node_features()

array([], shape=(34, 0), dtype=float64)

In [86]:
n._id_index.pandas_index

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
            33],
           dtype='int64')