In [1]:
import getpass
import os

from kgforge.core import KnowledgeGraphForge

from bluegraph.downstream import EmbeddingPipeline
from bluegraph.core import GraphElementEmbedder

## Configs

In [2]:
DOWNLOAD_DIR = "data/"

In [3]:
TOKEN = getpass.getpass()

········


In [4]:
endpoint = "https://staging.nexus.ocp.bbp.epfl.ch/v1"
bucket = "dke/embedder_catalog"

In [5]:
forge = KnowledgeGraphForge(
    "https://raw.githubusercontent.com/BlueBrain/nexus-forge/master/examples/notebooks/use-cases/prod-forge-nexus.yml",
    endpoint=endpoint,
    token=TOKEN, 
    bucket=bucket)

## Fetch the model

In [6]:
MODEL_ID = "https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/embedder_catalog/_/e2b953b9-6724-4278-a1e5-3472bd63e374"
MODEL_REVISION = 4  # specify here a revision, if necessary

In [8]:
model_resource = forge.retrieve(f"{MODEL_ID}{'?rev=' + str(MODEL_REVISION) if MODEL_REVISION is not None else ''}")
forge.download(
    model_resource, "distribution.contentUrl", DOWNLOAD_DIR)
pipeline_path = os.path.join(DOWNLOAD_DIR, model_resource.distribution.name)

## Get embeddings

In [9]:
pipeline = EmbeddingPipeline.load(
    pipeline_path,
    embedder_interface=GraphElementEmbedder,
    embedder_ext="zip")

Get the entire embedding table

In [19]:
embedding_table = pipeline.generate_embedding_table()

In [20]:
embedding_table.sample(5)

Unnamed: 0_level_0,embedding
@id,Unnamed: 1_level_1
https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/3e076429-4ab9-425b-8f5b-d54dc24bb0d1,"[0.95492154, 0.07009834, 0.8527492, 0.3953415,..."
https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/fe61675c-6996-4ecd-97a4-8a481fc1d9df,"[0.19131932, 0.4430596, 0.55083025, 0.6322002,..."
https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/abe242db-989e-4f96-bea0-aacb7ea7f953,"[0.5395997, 0.27949578, 0.5433785, 0.28566566,..."
https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/2c713500-56db-4f5d-8ad2-c136b282325e,"[0.06509262, 0.9548959, 0.30024314, 0.9601602,..."
https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/14bc5fe0-9fa3-4db2-83e3-ea8d428c0c9e,"[0.16468698, 0.32835978, 0.4562115, 0.5482951,..."


Get embeddings for particular points

In [23]:
# You can select a subset of points of your interest (here, some random 5 points)
selected_points = embedding_table.sample(5).index

In [24]:
embeddings = dict(zip(selected_points, pipeline.retrieve_embeddings(selected_points)))

In [25]:
embeddings

{'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/63e57e33-ad42-4061-abd2-7985e6c60810': [0.232523113489151,
  0.5161752700805664,
  0.5145605802536011,
  0.6888449192047119,
  0.26724308729171753,
  0.9471117258071899,
  0.10689541697502136,
  0.1659252643585205,
  0.19767823815345764,
  0.7479411363601685,
  0.22380682826042175,
  0.2966201901435852,
  0.36487939953804016,
  0.9663470983505249,
  0.18814629316329956,
  0.06588619947433472,
  0.17382386326789856,
  0.11880603432655334,
  0.5362321138381958,
  0.35540294647216797,
  0.18846911191940308,
  0.16325169801712036,
  0.5253798365592957,
  0.5509405136108398,
  0.7927967309951782,
  0.09605887532234192,
  0.21041753888130188,
  0.8145341277122498,
  0.8618761301040649,
  0.7124886512756348,
  0.5909754037857056,
  0.13244372606277466,
  0.767353892326355,
  0.7458503246307373,
  0.1895066499710083,
  0.9049203395843506,
  0.6466699242591858,
  0.23576441407203674,
  0.17844417691230774,
  0.4981507658958435,
  0