# Register embedding vectors for SEU morphologies in Nexus

## Setup

In [None]:
import os
import getpass

In [None]:
import requests

In [None]:
import getpass

In [None]:
import uuid

In [None]:
from kgforge.core import KnowledgeGraphForge

In [None]:
from kgforge.specializations.mappings import DictionaryMapping

In [None]:
from bluegraph.downstream import EmbeddingPipeline
from bluegraph.core import GraphElementEmbedder

In [None]:
TOKEN = getpass.getpass()

In [None]:
forge = KnowledgeGraphForge("../brain-modeling-ontology/config/forge-config.yml",
                            token=TOKEN,
                            bucket="neurosciencegraph/datamodels",
                            debug=True)

In [None]:
import json
with open("../brain-modeling-ontology/dump.json", "r") as f:
    data = json.load(f)

In [None]:
data.keys()

In [None]:
del data["@context"]

In [None]:
r = forge.from_json(data)

In [None]:
r.distribution = forge.attach(
    "../brain-modeling-ontology/ontologies/bbp/brain-modeling-ontology.ttl",
    content_type="text/turtle")

In [None]:
print(r)

In [None]:
forge.register(r, schema_id="https://neuroshapes.org/dash/ontology")

In [None]:
print(r)

## Fetch all SEU neuron morphologies from Nexus to get id's

In [None]:
query = """
    SELECT ?id ?rev ?name
    WHERE {
        ?id a NeuronMorphology ;
            name ?name ;
            <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
            <https://bluebrain.github.io/nexus/vocabulary/rev> ?rev .
    }
"""

In [None]:
forge.retrieve("https://bbp.epfl.ch/ontologies/core/bmo")

In [None]:
resources = forge.sparql(query, limit=1500)

In [None]:
len(resources)

In [None]:
resource_dict = {r.id: r for r in resources}

## Get resource embeddings

In [None]:
forge = KnowledgeGraphForge("https://raw.githubusercontent.com/BlueBrain/nexus-forge/master/examples/notebooks/use-cases/prod-forge-nexus.yml",
                            token=TOKEN,
                            endpoint = "https://staging.nexus.ocp.bbp.epfl.ch/v1",
                            bucket="dke/embedder_catalog")

In [None]:
MODEL_ID = "https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/embedder_catalog/_/14d61701-c4fa-44ea-8139-0e0ed606b4ec"
MODEL_REVISION = 7

In [None]:
model_resource

In [None]:
model_resource = forge.retrieve(f"{MODEL_ID}{'?rev=' + str(MODEL_REVISION) if MODEL_REVISION is not None else ''}")
forge.download(
    model_resource, "distribution.contentUrl", "data")
pipeline_path = os.path.join("data", model_resource.distribution.name)

In [None]:
pipeline = EmbeddingPipeline.load(
    pipeline_path,
    embedder_interface=GraphElementEmbedder,
    embedder_ext="zip")

In [None]:
embedding_table = pipeline.generate_embedding_table()

In [None]:
embeddings_list = []
for data in embedding_table.reset_index().to_dict("records"):
    at_id = data["@id"]
    embedding = data["embedding"].tolist()
    embeddings_list.append({
        "morphology_id": at_id,
        "morphology_rev": resource_dict[at_id].rev,
        "model_id": MODEL_ID,
        "model_version": "v0",
        "embedding_name": f"Embedding of morphology {resource_dict[at_id].name} at revision {resource_dict[at_id].rev}",
        "embedding": embedding,
        "uuid": at_id.split("/")[-1]
        
    })

## Map embeddings to Nexus resources

In [None]:
mapping = DictionaryMapping.load("./mappings/seu-embedding.hjson")

In [None]:
embedding_resources = forge.map(embeddings_list, mapping)

In [None]:
for r in embedding_resources:
    r.id = forge.format("identifier", "embeddings", str(uuid.uuid4()))

In [None]:
forge_staging = KnowledgeGraphForge("https://raw.githubusercontent.com/BlueBrain/nexus-forge/master/examples/notebooks/use-cases/prod-forge-nexus.yml",
                            token=TOKEN, 
                            endpoint="https://staging.nexus.ocp.bbp.epfl.ch/v1",        
                            bucket="dke/seu-embeddings")

In [None]:
forge_staging.register(embedding_resources)

Tag resources with model UUID

In [None]:
embs = forge_staging.search({"type": "Embedding"}, limit=None)

In [None]:
df = forge.as_dataframe(embs)

In [None]:
resources_to_upd = df[df["generation.activity.used.id"] == "https://staging.nexus.ocp.bbp.epfl.ch/v1/resources/dke/embedder_catalog/_/14d61701-c4fa-44ea-8139-0e0ed606b4ec"]["id"].tolist()

In [None]:
embs[0].derivation.entity.id

In [None]:
for e in embs:
    e.embedding = embedding_table.loc[e.derivation.entity.id].tolist()[0].tolist()

In [None]:
forge_staging.update(embs)

In [None]:
forge_staging.update(embs)

In [None]:
tag = f"{MODEL_ID.split('/')[-1]}?rev={MODEL_REVISION}"
forge_staging.tag(embs, tag)