In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import matplotlib.pyplot as plt

In [6]:
from string import Template

import numpy as np
import pandas as pd

import getpass
import json

from collections import defaultdict, namedtuple

from urllib.parse import quote_plus

from kgforge.core import KnowledgeGraphForge

from inference_tools.query.elastic_search import set_elastic_view
from inference_tools.utils import check_premise, execute_query, execute_query_pipe

In [3]:
TOKEN = getpass.getpass()

········


In [4]:
ENDPOINT = "https://staging.nexus.ocp.bbp.epfl.ch/v1"

In [5]:
forge = KnowledgeGraphForge(
    "../../configs/new-forge-config.yaml",
    endpoint=ENDPOINT,
    token=TOKEN, 
    bucket="dke/inference-test")

# Inference Rules

Configure the atlas

In [6]:
configuration = {
    "project": "dke/inference-test"
}

Get filters from atlas (or advanced query)

We get filters from atlas (or advanced query), for example:

    brainRegion "http://api.brain-map.org/api/v2/data/Structure/31"  # ACA
    species "http://purl.obolibrary.org/obo/NCBITaxon_10088"   # Mouse
    MType "http://uri.interlex.org/base/ilx_0381377"  # L6_UPC

In [7]:
input_filters = {
    "brainRegion": "http://api.brain-map.org/api/v2/data/Structure/31",
    "MType": "http://uri.interlex.org/base/ilx_0381377",
    "species": "http://purl.obolibrary.org/obo/NCBITaxon_10088",
}

Show all rules whose premises this search satisfies

- `KGQuery`: `SPARQLQuery`, `ESQuery`, `ForgeSearchQuery`
- `RulePremise`: `PropertyPremise` 

Components of a `QueryBasedGeneralizationRule`:

- __Rule Parameters__ values from atlas filters or advanced query (species, brain region, cell type (m/e/t), datatype) 
- __Premises__ need to be satisfied on provided rule params for the rule to be applied
- __Query__ is applied to retrieve resources (result of inference). Queries can use the provided rule parameters

Course of actions:
    - Take filters
    - Take all the available rules (?)
    - Search for rules whose premises filters satisfy:
       -  For each rule execute 'premise query' filled with parameters generated from filters
      - Return rules whose premises are satisfied
    - Apply queries of satisfied rules

In [392]:


def check_premise(forge, rule, parameters):
    if "hasParameter" in rule:
        parameters = build_parameter_map(
            rule["hasParameter"], parameters)
    satisfies = True
    for premise in rule["premise"]:
        if premise["@type"] == "SPARQLPremise":
            query = Template(premise["hasBody"]).substitute(**parameters)
            results = forge.sparql(query, limit=None)
            if len(results) == 0:
                satisfies = False
                break
        elif premise["@type"] == "ForgeSearchPremise":
            target_param = premise["hasBody"].get("targetParameter", None)
            target_path = premise["hasBody"].get("targetPath", None)
            query = json.loads(Template(json.dumps(premise["hasBody"]["pattern"])).substitute(
                **parameters))
            resources = forge.search(query)
            if target_param:
                # If target parameter is specified, we check that its value is in the
                if target_path:
                    matched_values = [
                        follow_path(forge.as_json(r), target_path)
                        for r in resources
                    ]
                else:
                    matched_values = [r.id for r in resources]
                if parameters[target_param] not in matched_values:
                    satisfies = False
                    break
            else:
                if len(resources) == 0:
                    satisfies = False
                    break
        elif premise["@type"] == "ElasticSearchPremise":
            print(premise)
        else:
            raise ValueError("Unknown type of premise")
    return satisfies


def follow_path(resource, path):
    value = resource
    path = path.split(".")
    for el in path:
        value = value[el]
    return value


def build_parameter_map(parameter_spec, parameter_values):
    param_map = {}
    for p in parameter_spec:
        name = p["name"]
        if p["type"] == "list":
            param_map[name] = ", ".join([f"\"{el}\"" for el in parameter_values[name]])
        elif p["type"] == "str":
            if isinstance(parameter_values[name], list):
                value = parameter_values[name][0]
            else:
                value = parameter_values[name]
            param_map[name] = f"\"{value}\""
        elif p["type"] == "sparql_uri":
            if isinstance(parameter_values[name], list):
                value = parameter_values[name][0]
            else:
                value = parameter_values[name]

            param_map[name] = value
        else:
            param_map[name] = parameter_values[name]
    return param_map


def execute_query(forge, query, parameters):
    if "hasParameter" in query:
        parameters = build_parameter_map(
            query["hasParameter"], parameters)
    if query["@type"] == "SPARQLQuery":
        query = Template(query["hasBody"]).substitute(**parameters)
        resources = forge.sparql(query, limit=None)
        return forge.as_json(resources)
    elif query["@type"] == "ForgeSearchQuery":
        target_param = query["hasBody"].get("targetParameter", None)
        target_path = query["hasBody"].get("targetPath", None)
        query = json.loads(Template(json.dumps(query["hasBody"]["pattern"])).substitute(
            **parameters))
        resources = forge.search(query)
        return resources
    elif query["@type"] == "SimilarityQuery":
        resources = execute_similarity_query(forge, query, parameters)
        return resources
    elif query["@type"] == "ElasticSearchQuery":
        print(query)
    else:
        raise ValueError("Unknown type of query")


def execute_composite_query(forge, head, parameters, rest=None):
    if rest is None:
        if head["@type"] == "QueryPipe":
            return execute_composite_query(
                forge, head["head"], parameters, head["rest"])
        else:
            return execute_query(forge, head, parameters)
    else:
        result = execute_query(forge, head, parameters)
        # Compute new parameters combining old parameters and the result of the query
        new_parameters = {**parameters}
        for mapping in head["resultParameterMapping"]:
            if isinstance(result, list):
                new_parameters[mapping["parameterName"]] = [
                    follow_path(el, mapping["path"]) for el in result
                ]
            else:
                new_parameters[mapping["parameterName"]] = result[mapping["path"]]
        if rest["@type"] == "QueryPipe":
            return execute_composite_query(forge, rest["head"], new_parameters, rest["rest"])
        else:
            return execute_query(forge, rest, new_parameters)
        

def execute_search_query(forge, rule, parameters):
    return execute_composite_query(forge, rule["searchQuery"], parameters)

# Principle: PC cells in neocortex have the same E-behaviour

## Rule A (simple query)

Input parameters: brain_region, MType
If (premise)
   (brain region)-[is_part_of]->({id: isocortex})
   (mtype)-[IS_SUBCLASS_OF]->({id: PC})

Return X s.t.
- (X)-[IS_A]->(Trace)
- (X)-[subject/species]->({id: Mouse})
- (X)-[brainLocation/brainRegion]->(brain_region)-[is_part_of]->({id: isocortex})
- (X)-[brainLocation/layer]->(brain layer {id: L5})
- (X)-[hasMType]->(MType)-[IS_SUBCLASS_OF]->({id: PC})

In [393]:
premise = [
   {
        "@type": "SPARQLPremise",
        "hasParameter": [
            {
                "name": "brainRegion",
                "description": "URI of the query brain region",
                "type": "sparql_uri"
            }
        ],
        "hasBody": """
            SELECT ?x
            WHERE {
                <$brainRegion> <http://schema.org/isPartOf>* <http://api.brain-map.org/api/v2/data/Structure/315> .
            }
        """
    },
    {
        "@type": "SPARQLPremise",
        "hasParameter": [
            {
                "name": "MType",
                "description": "URI of the query MType",
                "type": "sparql_uri"
            }
        ],
        "hasBody": """
            SELECT ?x
            WHERE {
                <$MType> rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
            }
        """
    },
    {
        "@type": "ForgeSearchPremise",
        "hasParameter": [
            {
                "name": "brainRegion",
                "description": "URI of the query brain region",
                "type": "sparql_uri"
            }
        ],
        "hasBody": {
            "targetParameter": "brainRegion",
            "targetPath": "id",
            "pattern": {
                "isPartOf": "http://api.brain-map.org/api/v2/data/Structure/315"
            }
        }
    },
]

In [394]:
search_query_A = {
    "@type": "SPARQLQuery",
    "hasBody": """
        SELECT ?resource
        WHERE {
          ?resource a <https://neuroshapes.org/Trace> ;
                      <https://neuroshapes.org/annotation>/<https://neuroshapes.org/hasBody> ?mtype ;
                      <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
                      <https://neuroshapes.org/brainLocation>/<https://neuroshapes.org/brainRegion> ?brainRegion.
          ?mtype rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
          ?brainRegion <http://schema.org/isPartOf>* <http://api.brain-map.org/api/v2/data/Structure/315> .
        }
    """
}

In [395]:
rule_A = {
    "@type": "GeneralizationRule",
    "name": "Trace generalization for cortical PCs",
    "description": "Collect traces belonging to PCs from other cortical regions",
    "premise": premise,
    "searchQuery": search_query_A
}

In [12]:
check_premise(forge, rule_A, input_filters)

True

In [13]:
execute_search_query(forge, rule_A, input_filters)

[{'resource': 'https://bbp.epfl.ch/neurosciencegraph/data/ff0e496d-b8a0-4c0e-8487-545563495dc0'},
 {'resource': 'https://bbp.epfl.ch/neurosciencegraph/data/2b108ccf-4ad2-4495-8490-17b2fde656fa'},
 {'resource': 'https://bbp.epfl.ch/neurosciencegraph/data/4fba4ab3-189c-4ff4-96bf-6904c1b8912b'}]

## Rule B (query + similarity search)

Input parameters: brain_region, MType, similarityModelId, topNeighbors

Find closest MType and then return traces/morphs with the closest MType that is also a subclass of pyramidal neuron and that are located in a cortical region.

If (premise)
   (brain region)-[is_part_of]->({id: isocortex})
   (mtype)-[IS_SUBCLASS_OF]->({id: PC})


Return Y s.t.
- (Y)-[hasMType]->(MTypeY)
- Select top K MTypeY by sim_{similarityModel}(MType, MTypeY) 

(or for traces)
Return Y s.t.
- (Y)-[hasEType]->(ETypeY)
- Select either top K ETypeY by sim_{similarityModel}(ETypeX, ETypeY) or s.t sim_{similarityModel}(ETypeX, ETypeY) > similarityThreshold

In [14]:
set_elastic_view(forge, "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view")

__Notes__:

- Similarity-based query is not allowed in premises
- You can use results from the previous query or input params in the next query

TODO: make rule know which view to use
- "configuration": "https://bbp.epfl.ch/neurosciencegraph/data/ontology-class-recommender-config",
- "similarityModels": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel",          

In [15]:
search_query_B =  {
    "@type": "QueryPipe",
    "head": {
        # Query type
        "@type": "SPARQLQuery",
#         "queryConfiguration": {
#             "view": "<sparql_view>"
#         },
        # Query body
        "hasBody": """
            SELECT ?mtype
            WHERE {
                ?mtype rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
                ?trace <https://neuroshapes.org/annotation>/<https://neuroshapes.org/hasBody> ?mtype.
            }
        """,
        # Mapping of query results to the parameters (passed to the next query)
        "resultParameterMapping": [
            {
                "path": "mtype",
                "parameterName": "acceptableResults"
            }
        ]
    },
    "rest": { 
        "@type": "QueryPipe",
        "head": {
            # Query type
            "@type": "SimilarityQuery",
            # Number of top results to return
            "k": 1,
            # Specification of input parameters
            "hasParameter": [
                {
                    "name": "MType",
                    "description": "URI of the query MType",
                    "type": "str"
                },
                {
                    "name": "acceptableResults",
                    "description": "List of URI's of acceptable MTypes (all PCs)",
                    "type": "list"
                }
            ],
            # Query to retrieve embeddings
            "embeddingQuery": """
                {
                  "from": 0,
                  "size": 1,
                  "query": {
                    "bool": {
                      "must": [
                        {
                          "nested": {
                            "path": "derivation.entity",
                            "query": {
                              "terms": {
                                "derivation.entity.@id": [$MType]
                              }
                            }
                          }
                        }
                      ]
                    }
                  }
                }
            """, 
            # Filter to apply on the similarity search results
            "resultFilter": """
                "must": {
                    "nested": {
                        "path": "derivation.entity",
                        "query": {
                            "terms": { "derivation.entity.@id": [$acceptableResults] }
                        }
                    }
                }
            """,
            # Mapping of query results to the parameters (passed to the next query)
            "resultParameterMapping": [{
                "path": "derivation.entity.@id",
                "parameterName": "closestMType"
            }]
        },
        "rest": {
            # Query type
            "@type": "SPARQLQuery",
            # Specification of input parameters
            "hasParameter": [
                {
                    "name": "closestMType",
                    "description": "URI of the closest acceptable MType",
                    "type": "sparql_uri"
                }
            ],
            # Query body
            "hasBody": """
                SELECT ?id
                WHERE {
                    ?id a <https://neuroshapes.org/Trace> ;
                        <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
                        <https://neuroshapes.org/annotation> ?annotation ;
                        <https://neuroshapes.org/brainLocation>/<https://neuroshapes.org/brainRegion> ?brainRegion.
                    ?annotation <https://neuroshapes.org/hasBody> <$closestMType>.
                }
            """
        }
    }
}

In [16]:
rule_B = {
    "@type": "GeneralizationRule",
    "name": "MType similarity based trace generalization for cortical PCs",
    "description": "Collect traces belonging to the closest (PC) MType from other cortical regions",
    "premise": premise,
    "searchQuery": search_query_B
}

In [17]:
check_premise(forge, rule_B, input_filters)

True

In [168]:
execute_search_query(forge, rule_B, input_filters)

ValueError: No config provided

In [412]:
similarity_query1 = {
    # Query type
    "@type": "SimilarityQuery",
    # Number of top results to return
    "k": 1,
    # Configuration of similarity views
    # (multiple records mean that multiple models are combined)
    "queryConfiguration": {
        "embeddingModel": {
            "@id": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel",
            "@type": "EmbeddingModel"
        },
        "similarityView": {
            "@id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view",
            "@type": "ElasticSearchView"
        },
        "scoreFormula": "euclidean",
    },
    # Specification of input parameters
    "hasParameter": [
        {
            "name": "MType",
            "description": "URI of the query MType",
            "type": "str"
        }
    ],
    # Target to retrieve embeddings
    "searchTargetParameter": "MType",
    # Mapping of query results to the parameters (passed to the next query)
    "resultParameterMapping": [{
        "path": "derivation.entity.@id",
        "parameterName": "closestMType"
    }]
}

In [463]:
similarity_query2 = {
    # Query type
    "@type": "SimilarityQuery",
    # Number of top results to return
    "k": 1,
    # Configuration of similarity views
    # (multiple records mean that multiple models are combined)
    "queryConfiguration": [
        {
            "embeddingModel": {
                "@id": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel",
                "@type": "EmbeddingModel"
            },
            "similarityView": {
                "@id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view",
                "@type": "ElasticSearchView"
            },
            "boosted": True,
            "boostingView": {
                "@id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-boosting-view",
                "@type": "ElasticSearchView"
            },
            "statisticsView": {
                "@id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-statistics-view",
                "@type": "ElasticSearchView"
            }
        },
        {
            "embeddingModel": {
                "@id": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel2",
                "@type": "EmbeddingModel"
            },
            "similarityView": {
                "@id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view2",
                "@type": "ElasticSearchView"
            },
            "boosted": True,
            "boostingView": {
                "@id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-boosting-view2",
                "@type": "ElasticSearchView"
            },
            "statisticsView": {
                "@id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-statistics-view2",
                "@type": "ElasticSearchView"
            }
        }
    ],
    # Specification of input parameters
    "hasParameter": [
        {
            "name": "MType",
            "description": "URI of the query MType",
            "type": "str"
        }
    ],
    # Target to retrieve embeddings
    "searchTargetParameter": "MType",
    # Mapping of query results to the parameters (passed to the next query)
    "resultParameterMapping": [{
        "path": "derivation.entity.@id",
        "parameterName": "closestMType"
    }]

}

In [467]:
FORMULAS = {
    "cosine": "doc['embedding'].size() == 0 ? 0 : (cosineSimilarity(params.query_vector, doc['embedding']) + 1.0) / 2",
    "euclidean": "doc['embedding'].size() == 0 ? 0 : (1 / (1 + l2norm(params.query_vector, doc['embedding'])))",
    "poincare": "float[] v = doc['embedding'].vectorValue; if (doc['embedding'].size() == 0) { return 0; } double am = doc['embedding'].magnitude; double bm = 0; double dist = 0; for (int i = 0; i < v.length; i++) { bm += Math.pow(params.query_vector[i], 2); dist += Math.pow(v[i] - params.query_vector[i], 2); } bm = Math.sqrt(bm); dist = Math.sqrt(dist); double x = 1 + (2 * Math.pow(dist, 2)) / ( (1 - Math.pow(bm, 2)) * (1 - Math.pow(am, 2)) );  double d = Math.log(x + Math.sqrt(Math.pow(x, 2) - 1)); return 1 / (1 + d);"
}
VECTOR_PARAMETER = "query_vector"


def get_all_documents(forge):
    """Get all ES documents using current forge session."""
    return forge.elastic("""
        {
          "query": {
              "term": {
                  "_deprecated": false
                }
            }
        }
    """)
    

def get_embedding_vector(forge, search_target):
    """Get embedding vector for the target of the input similarity query.
    
    Parameters
    ----------
    forge : KnowledgeGraphForge
        Instance of a forge session
    search_target : str
        Value of the search target (usually, a resource ID for which we
        want to retrieve its nearest neighbors).

    Returns
    -------
    vector_id : str
        ID of the resource corresponding to the search target
    vector : list
        Corresponding embedding vector 
    """
    vector_query = """
        {
          "from": 0,
          "size": 1,
          "query": {
            "bool": {
              "must": [
                {
                  "nested": {
                    "path": "derivation.entity",
                    "query": {
                      "terms": {
                        "derivation.entity.@id": [$_searchTarget]
                      }
                    }
                  }
                }
              ]
            }
          }
        }
    """
    vector_query = Template(vector_query).substitute({"_searchTarget": search_target})
    result = forge.elastic(vector_query)
    vector_id = result[0]._id
    vector = result[0]._source["embedding"]
    return vector_id, vector
 

def get_neighbors(forge, vector, vector_id, k, score_formula="euclidean",
                  result_filter=None, parameters=None):
    """Get nearest neighbors of the provided vector.
    
    Parameters
    ----------
    forge : KnowledgeGraphForge
        Instance of a forge session
    vector : list
        Vector to provide into similarity search
    vector_id : str
        Id of the embedding resource  corresponding to the 
        provided search vector (will be excluded in the
        similarity search).
    score_formula : str, optional
        Name of the formula to use for computing similarity scores,
        possible values: "euclidean" (default), "cosine", "poincare".
    result_filter : str, optional
        String representing a parametrized ES filter expression to append
        to the search query (e.g. "'must': {'terms': {'tag': ['a', 'b', 'c']}}")).
    parameters : dict, optional
        Parameter dictionary to use in the provided `result_filter` statement.
        
    Returns
    -------
    result : list of tuples
        List of similarity search results, each element is a tuple with the score and
        the corresponding resource (json representation fo the resource).
    """
    # Preprocess result filter
    if result_filter:
        if parameters:
            result_filter = Template(result_filter).substitute(parameters)
        result_filter = ",\n" + result_filter
    else:
        result_filter = ""

    similarity_query = """
        {
          "size": $_k,
          "query": {
            "script_score": {
                "query": {
                    "bool" : {
                      "must_not" : {
                        "term" : { "_id": "$_vectorId" }
                      },
                      "must": { "exists": { "field": "embedding" } }
                      $_resultFilter
                    }
                },
                "script": {
                    "source": "$_formula",
                    "params": {
                      "query_vector": $_vector
                    }
                }
            }
          }
    }
    """
    similarity_query = Template(similarity_query).substitute({
        "_vectorId": vector_id,
        "_vector": vector,
        "_k": k,
        "_formula": FORMULAS[score_formula],
        "_resultFilter": result_filter
    })
    result = [
        (el._score, el._source)
        for el in forge.elastic(similarity_query, limit=1000)
    ]
    return result


def query_similar_resources(forge, query, config, parameters, k):
    """Query similar resources using the similarity query.
    
    Parameters
    ----------
    forge : KnowledgeGraphForge
        Instance of a forge session
    query : dict
        Json representation of the similarity search query (`SimilarityQuery`)
    config: dict or list of dict
        Query configuration containing references to the target views
        to be queried.
    parameters : dict
        Input parameters used in the similarity query
    k : int
        Number of nearest neighbors to query
    
    Returns
    -------
    result : list of tuples
        List of similarity search results, each element is a tuple with the score and
        the corresponding resource (json representation fo the resource).
    """
    # Set ES view from the config
    set_elastic_view(forge, config["similarityView"]["@id"])

    # Get search target vector
    target_parameter = query.get("searchTargetParameter", None)
    search_target = parameters[target_parameter]
    vector_id, vector = get_embedding_vector(forge, search_target)

    # Retrieve score formula from the model
    model = forge.retrieve(config["embeddingModel"]["@id"])
    score_formula = model.similarity
    
    # Setup the result filter
    result_filter = query.get("resultFilter", "")
    
    # Search neighbors
    result = get_neighbors(forge, vector, vector_id, k, score_formula=score_formula,
                  result_filter=result_filter, parameters=parameters)
    return vector_id, result


def get_score_stats(forge, config, boosted=False):
    # Get view stats to do min/max scaling of scores
    set_elastic_view(forge, config["statisticsView"]["@id"])
    boosted_str = "true" if boosted else "false"
    statistics = forge.elastic(f"""
        {{
          "query": {{
            "bool" : {{
              "must" : {{
                "term" : {{ "_deprecated": false }}
              }},
              "must": {{
                "term": {{ "boosted": {boosted_str} }}
              }}
            }}
          }}
        }}
    """)
    if len(statistics) == 0:
        raise ValueError("No view statistics found")

    if len(statistics) > 1:
        # Here warn that more than one is found, we will use one of them
        pass
    statistics = statistics[0]
    min_score = None
    max_score = None
    for el in statistics._source["series"]:
        if el["statistic"] == "min":
            min_score = el["value"]
        if el["statistic"] == "max":
            max_score = el["value"]

    return min_score, max_score


def get_boosting_factors(forge, config):
    set_elastic_view(forge, config["boostingView"]["@id"])
    factors = forge.elastic("""
       {
          "query": {
                "term" : { "_deprecated": false }
              }
        }
    """)
    if len(factors) == 0:
        raise ValueError("No boosting factors found")
    boosting_factors = {}
    for el in factors:
        boosting_factors[el._source["derivation"]["entity"]["@id"]] =\
            el._source["value"]
    return boosting_factors
    
def execute_similarity_query(forge, query, parameters):
    """Execute similarity search query
    
    Parameters
    ----------
    forge : KnowledgeGraphForge
        Instance of a forge session
    query : dict
        Json representation of the similarity search query (`SimilarityQuery`)
    parameters : dict
        Input parameters used in the similarity query
    
    Returns
    -------
    neighbors : list of resource ID
        List of similarity search results, each element is a resource ID.
    """
    config = query.get("queryConfiguration", None)
    if config is None:
        raise ValueError("No config provided")
    
    k = query["k"]
    
    neighbors = []
    if isinstance(config, dict) or len(config) == 1:
        if isinstance(config, list):
            config = config[0]
        # Perform similarity search using a single similarity model
        _, neighbors = query_similar_resources(forge, query, config, parameters, k)
        neighbors = [
            n["derivation"]["entity"]["@id"]
            for _, n in neighbors
        ]
    else:
        # Perform similarity search combining several similarity models
        all_resources = get_all_documents(forge)
        n_resources = len(all_resources)

        vector_ids = []
        all_neighbors = []
        stats = []
        all_boosting_factors = {}
        all_boosted_stats = {}
        for i, individual_config in enumerate(config):
            vector_id, neighbors = query_similar_resources(
                forge, query, individual_config, parameters, n_resources)
            vector_ids.append(vector_id)
            all_neighbors.append(neighbors)
            
            min_score, max_score = get_score_stats(
                forge, individual_config)
            stats.append((min_score, max_score))
            if individual_config["boosted"]:
                boosting_factors = get_boosting_factors(
                    forge, individual_config)
                all_boosting_factors[i] = boosting_factors
                all_boosted_stats[i] = get_score_stats(
                    forge, individual_config, boosted=True)

        # Combine the results
        combined_results = defaultdict(list)
        for i, neighbor_collection in enumerate(all_neighbors):
            min_score, max_score = stats[i]
            boosting_factor = 1
            boosted_min, boosted_max = min_score, max_score
            if i in all_boosting_factors:
                # We need to boost the scores
                boosting_factor = all_boosting_factors[i][vector_ids[i]]
                boosted_min, boosted_max = all_boosted_stats[i]

            for score, n in neighbor_collection:
                resource_id = n["derivation"]["entity"]["@id"]
                score = score * boosting_factor
                score = (score - boosted_min) / (boosted_max - boosted_min)
                combined_results[resource_id].append(score)

        combined_results = {
            k: np.array(v).mean()
            for k, v in combined_results.items()
        }
        neighbors = pd.DataFrame(
            combined_results.items(), columns=["result", "score"]).nlargest(
                k, columns=["score"])["result"].tolist()
        
    return neighbors

__TODO__: 
- add sparql/forge/es query configuration

In [468]:
execute_query(forge, similarity_query1, {"MType": "http://uri.interlex.org/base/ilx_0381377"})

['http://uri.interlex.org/base/ilx_0381371']

In [469]:
execute_query(forge, similarity_query2, {"MType": "http://uri.interlex.org/base/ilx_0381377"})

['nsg:TufterdPyramidalNeuron']

In [None]:
- creation of embedding vectors
- indexing
- stats + boosting
- queries

In [470]:
forge._store.endpoint

'https://staging.nexus.ocp.bbp.epfl.ch/v1'