In [19]:
import getpass
import requests
from urllib.parse import quote_plus

import numpy as np
import nexussdk as nxs

from kgforge.core import KnowledgeGraphForge

In [17]:
def set_elastic_view(forge, view):
    views_endpoint = "/".join((
        ENDPOINT,
        "views",
        quote_plus(forge._store.bucket.split("/")[0]),
        quote_plus(forge._store.bucket.split("/")[1])))
    forge._store.service.elastic_endpoint["endpoint"] = "/".join(
        (views_endpoint, quote_plus(view), "_search"))
    
    
def get_all_vectors(forge, resource_limit):
    all_embeddings = forge.elastic(f"""{{
        "from" : 0,
        "size" : {resource_limit},
        "query": {{
            "term": {{"_deprecated": false}}
        }}
    }}
    """)
    vectors = {
        result._source["@id"]: result._source["embedding"]
        for result in all_embeddings
    }
    return vectors

In [2]:
ENDPOINT = "https://bbp.epfl.ch/nexus/v1"
DOWNLOAD_DIR = "./data"
TOKEN = getpass.getpass()

········


In [3]:
ATLAS_CONFIG_ORG = "bbp"
ATLAS_CONFIG_PROJECT = "atlas"

In [4]:
ATLAS_RECOMMENDER_CONFIG = "https://bbp.epfl.ch/neurosciencegraph/data/d9938314-4e27-4c45-8afe-44484b02636d"

In [6]:
forge_atlas = KnowledgeGraphForge(
    "../../configs/new-forge-config.yaml",
    endpoint=ENDPOINT,
    token=TOKEN, 
    bucket=f"{ATLAS_CONFIG_ORG}/{ATLAS_CONFIG_PROJECT}")

In [34]:
forge_models = KnowledgeGraphForge(
    "../../configs/new-forge-config.yaml",
    endpoint=ENDPOINT,
    token=TOKEN, 
    bucket="dke/embedding-pipelines")

In [8]:
nxs.config.set_environment(ENDPOINT)
nxs.config.set_token(TOKEN)

In [9]:
config_resource = forge_atlas.retrieve(ATLAS_RECOMMENDER_CONFIG)

In [31]:
FORMULAS = {
    "cosine": "(cosineSimilarity(params.query_vector, doc['embedding']) + 1.0) / 2",
    "euclidean": "1 / (1 + l2norm(params.query_vector, doc['embedding']))",
    "poincare": "float[] v = doc['embedding'].vectorValue; double am = doc['embedding'].magnitude; double bm = 0; double dist = 0; for (int i = 0; i < v.length; i++) { bm += Math.pow(params.query_vector[i], 2); dist += Math.pow(v[i] - params.query_vector[i], 2); } bm = Math.sqrt(bm); dist = Math.sqrt(dist); double x = 1 + (2 * Math.pow(dist, 2)) / ( (1 - Math.pow(bm, 2)) * (1 - Math.pow(am, 2)) );  double d = Math.log(x + Math.sqrt(Math.pow(x, 2) - 1)); return 1 / (1 + d);"
}

In [57]:
np.linalg.norm([0, 1])

1.0

In [61]:
for el in config_resource.configuration:
    print(el.embeddingModel.id)
    model = forge_models.retrieve(el.embeddingModel.id)
    print(model.name)
    formula = FORMULAS[model.similarity]
    similarity_view = el.similarityView.id    
    set_elastic_view(forge_atlas, similarity_view)
    vectors = get_all_vectors(forge_atlas, 1000)
    total_vectors = len(vectors)
    print("\tAll vectors: ", total_vectors)
    
    for k, vector in vectors.items():
        print("\t\t", np.array(vector).shape)
        print("\t\t", np.linalg.norm(vector))
        query = f"""{{
          "size": {len(vectors)},
          "query": {{
            "script_score": {{
                "query": {{
                    "bool" : {{
                      "must_not" : {{
                        "term" : {{ "@id": "{k}" }}
                      }},
                      "must": {{ "exists": {{ "field": "embedding" }} }}
                    }}
                }},
                "script": {{
                    "source": "{formula}",
                    "params": {{
                      "query_vector": {vector}
                    }}
                }}
            }}
          }}
        }}"""
        res = forge_atlas.elastic(query)
        print(query)
        print("\tResults for individual vectors:", len(res))
        break

    set_elastic_view(forge_atlas, el.boostingView.id)
    boosting = forge_atlas.elastic(f"""{{
        "from" : 0,
        "size" : 10000,
        "query": {{
            "term": {{"_deprecated": false}}
        }}
    }}
    """)
    print("\tBoosting factors: ", len(boosting))
    print()

https://bbp.epfl.ch/nexus/v1/resources/dke/embedding-pipelines/_/d0c21fd5-cb9c-445c-b0a4-94847ba61f5a
SEU NeuronMorphology Neurite Features
	All vectors:  298
		 (132,)
		 0.36763958112832346
{
          "size": 298,
          "query": {
            "script_score": {
                "query": {
                    "bool" : {
                      "must_not" : {
                        "term" : { "@id": "https://bbp.epfl.ch/neurosciencegraph/data/embeddings/578b2f6d-af38-4d09-8aad-5b36fd348c55" }
                      },
                      "must": { "exists": { "field": "embedding" } }
                    }
                },
                "script": {
                    "source": "1 / (1 + l2norm(params.query_vector, doc['embedding']))",
                    "params": {
                      "query_vector": [0.02186267450451851, 0.02186267450451851, 0.010412359610199928, 0.010412359610199928, 0.010360310785472393, 0.010360310785472393, 0.010464238002896309, 0.010464238002896309, 0.0

SEU NeuronMorphology Brain Region Embedding
	All vectors:  298
		 (32,)
		 0.5417642392704944
{
          "size": 298,
          "query": {
            "script_score": {
                "query": {
                    "bool" : {
                      "must_not" : {
                        "term" : { "@id": "https://bbp.epfl.ch/neurosciencegraph/data/embeddings/8e733790-a69e-424f-97a1-80f58e55975a" }
                      },
                      "must": { "exists": { "field": "embedding" } }
                    }
                },
                "script": {
                    "source": "float[] v = doc['embedding'].vectorValue; double am = doc['embedding'].magnitude; double bm = 0; double dist = 0; for (int i = 0; i < v.length; i++) { bm += Math.pow(params.query_vector[i], 2); dist += Math.pow(v[i] - params.query_vector[i], 2); } bm = Math.sqrt(bm); dist = Math.sqrt(dist); double x = 1 + (2 * Math.pow(dist, 2)) / ( (1 - Math.pow(bm, 2)) * (1 - Math.pow(am, 2)) );  double d = Math.log

In [None]:
"float[] v = doc['embedding'].vectorValue; double am = doc['embedding'].magnitude; double bm = 0; double dist = 0; for (int i = 0; i < v.length; i++) { bm += Math.pow(params.query_vector[i], 2); dist += Math.pow(v[i] - params.query_vector[i], 2); } bm = Math.sqrt(bm); dist = Math.sqrt(dist); double x = 1 + (2 * Math.pow(dist, 2)) / ( (1 - Math.pow(bm, 2)) * (1 - Math.pow(am, 2)) );  double d = Math.log(x + Math.sqrt(Math.pow(x, 2) - 1)); return 1 / (1 + d);"

In [None]:
"float[] v = doc['embedding'].vectorValue; double am = doc['embedding'].magnitude; double bm = 0; double dist = 0; for (int i = 0; i < v.length; i++) { bm += Math.pow(params.query_vector[i], 2); dist += Math.pow(v[i] - params.query_vector[i], 2); } bm = Math.sqrt(bm); dist = Math.sqrt(dist); double x = 1 + (2 * Math.pow(dist, 2)) / ( (1 - Math.pow(bm, 2)) * (1 - Math.pow(am, 2)) );  double d = Math.log(x + Math.sqrt(Math.pow(x, 2) - 1)); return 1 / (1 + d);"

In [None]:
"float[] v = doc['embedding'].vectorValue; double am = doc['embedding'].magnitude; double bm = 0; double dist = 0; for (int i = 0; i < v.length; i++) { bm += Math.pow(params.query_vector[i], 2); dist += Math.pow(v[i] - params.query_vector[i], 2); } bm = Math.sqrt(bm); dist = Math.sqrt(dist); double x = 1 + (2 * Math.pow(dist, 2)) / ( (1 - Math.pow(bm, 2)) * (1 - Math.pow(am, 2)) );  double d = Math.log(x + Math.sqrt(Math.pow(x, 2) - 1)); return 1 / (1 + d);