In [3]:
import matplotlib.pyplot as plt

In [4]:
from string import Template

import numpy as np

import getpass
import json


from urllib.parse import quote_plus

from kgforge.core import KnowledgeGraphForge

In [5]:
TOKEN = getpass.getpass()

········


In [6]:
ENDPOINT = "https://staging.nexus.ocp.bbp.epfl.ch/v1"

In [11]:
forge = KnowledgeGraphForge(
    "../../configs/new-forge-config.yaml",
    endpoint=ENDPOINT,
    token=TOKEN, 
    bucket="dke/inference-test")

# Inference Rules

Configure the atlas

In [12]:
configuration = {
    "project": "dke/inference-test"
}

Get filters from atlas (or advanced query)

We get filters from atlas (or advanced query), for example:

    brainRegion "http://api.brain-map.org/api/v2/data/Structure/31"  # ACA
    species "http://purl.obolibrary.org/obo/NCBITaxon_10088"   # Mouse
    MType "http://uri.interlex.org/base/ilx_0381377"  # L6_UPC

In [13]:
input_filters = {
    "brainRegion": "http://api.brain-map.org/api/v2/data/Structure/31",
    "MType": "http://uri.interlex.org/base/ilx_0381377",
    "species": "http://purl.obolibrary.org/obo/NCBITaxon_10088",
}

Show all rules whose premises this search satisfies

- `KGQuery`: `SPARQLQuery`, `ESQuery`, `ForgeSearchQuery`
- `RulePremise`: `PropertyPremise` 

Components of a `QueryBasedGeneralizationRule`:

- __Rule Parameters__ values from atlas filters or advanced query (species, brain region, cell type (m/e/t), datatype) 
- __Premises__ need to be satisfied on provided rule params for the rule to be applied
- __Query__ is applied to retrieve resources (result of inference). Queries can use the provided rule parameters

Course of actions:
    - Take filters
    - Take all the available rules (?)
    - Search for rules whose premises filters satisfy:
       -  For each rule execute 'premise query' filled with parameters generated from filters
      - Return rules whose premises are satisfied
    - Apply queries of satisfied rules

In [57]:
def set_elastic_view(forge, view):
    views_endpoint = "/".join((
        ENDPOINT,
        "views",
        quote_plus(forge._store.bucket.split("/")[0]),
        quote_plus(forge._store.bucket.split("/")[1])))
    print(views_endpoint)
    print(
        (views_endpoint, quote_plus(view), "_search"))
    forge._store.service.elastic_endpoint["endpoint"] = "/".join(
        (views_endpoint, quote_plus(view), "_search"))


def check_premise(forge, rule, parameters):
    if "hasParameter" in rule:
        parameters = build_parameter_map(
            rule["hasParameter"], parameters)
    satisfies = True
    for premise in rule["premise"]:
        if premise["@type"] == "SPARQLQuery":
            query = Template(premise["hasBody"]).substitute(**parameters)
            results = forge.sparql(query, limit=None)
            if len(results) == 0:
                satisfies = False
                break
        elif premise["@type"] == "ForgeSearchQuery":
            target_param = premise["hasBody"].get("targetParameter", None)
            target_path = premise["hasBody"].get("targetPath", None)
            query = json.loads(Template(json.dumps(premise["hasBody"]["pattern"])).substitute(
                **parameters))
            resources = forge.search(query)
            if target_param:
                # If target parameter is specified, we check that its value is in the
                if target_path:
                    matched_values = [
                        follow_path(forge.as_json(r), target_path)
                        for r in resources
                    ]
                else:
                    matched_values = [r.id for r in resources]
                if parameters[target_param] not in matched_values:
                    satisfies = False
                    break
            else:
                if len(resources) == 0:
                    satisfies = False
                    break
        elif premise["@type"] == "ElasticSearchQuery":
            print(premise)
        else:
            raise ValueError("Unknown type of premise")
    return satisfies


def follow_path(resource, path):
    value = resource
    path = path.split(".")
    for el in path:
        value = value[el]
    return value


def build_parameter_map(parameter_spec, parameter_values):
    param_map = {}
    for p in parameter_spec:
        name = p["name"]
        if p["type"] == "list":
            param_map[name] = ", ".join([f"\"{el}\"" for el in parameter_values[name]])
        elif p["type"] == "str":
            if isinstance(parameter_values[name], list):
                value = parameter_values[name][0]
            else:
                value = parameter_values[name]
            param_map[name] = f"\"{value}\""
        elif p["type"] == "sparql_uri":
            if isinstance(parameter_values[name], list):
                value = parameter_values[name][0]
            else:
                value = parameter_values[name]

            param_map[name] = value
        else:
            param_map[name] = parameter_values[name]
    return param_map

 
def execute_similarity_query(forge, query, parameters):
    k = query["k"]
    vector_query = query["embeddingQuery"]
    vector_query = Template(vector_query).substitute(**parameters)
    result = forge.elastic(vector_query)
    vector_id = result[0]._id
    vector = result[0]._source["embedding"]
    
    result_filter = query.get("resultFilter", "")
    if result_filter:
        result_filter = ",\n" + Template(result_filter).substitute(parameters)

    similarity_query = """
        {
          "size": $_k,
          "query": {
            "script_score": {
                "query": {
                    "bool" : {
                      "must_not" : {
                        "term" : { "_id": "$_vectorId" }
                      },
                      "must": { "exists": { "field": "embedding" } }
                      $_resultFilter
                    }
                },
                "script": {
                    "source": "doc['embedding'].size() == 0 ? 0 : (1 / (1 + l2norm(params.query_vector, doc['embedding'])))",
                    "params": {
                      "query_vector": $_vector
                    }
                }
            }
          }
    }
    """
    similarity_query = Template(similarity_query).substitute({
        "_vectorId": vector_id,
        "_vector": vector,
        "_k": k,
        "_resultFilter": result_filter
    })
    results = forge.elastic(similarity_query, limit=1000)
    return [r._source for r in results]


def execute_query(forge, query, parameters):
    if "hasParameter" in query:
        parameters = build_parameter_map(
            query["hasParameter"], parameters)
    if query["@type"] == "SPARQLQuery":
        query = Template(query["hasBody"]).substitute(**parameters)
        resources = forge.sparql(query, limit=None)
        return forge.as_json(resources)
    elif query["@type"] == "ForgeSearchQuery":
        target_param = query["hasBody"].get("targetParameter", None)
        target_path = query["hasBody"].get("targetPath", None)
        query = json.loads(Template(json.dumps(query["hasBody"]["pattern"])).substitute(
            **parameters))
        resources = forge.search(query)
        return resources
    elif query["@type"] == "SimilarityQuery":
        resources = execute_similarity_query(forge, query, parameters)
        return resources
    elif query["@type"] == "ElasticSearchQuery":
        print(query)
    else:
        raise ValueError("Unknown type of query")


def execute_composite_query(forge, head, parameters, rest=None):
    if rest is None:
        if head["@type"] == "CompositeQuery":
            return execute_composite_query(
                forge, head["head"], parameters, head["rest"])
        else:
            return execute_query(forge, head, parameters)
    else:
        result = execute_query(forge, head, parameters)
        # Compute new parameters combining old parameters and the result of the query
        new_parameters = {**parameters}
        for mapping in head["resultParameterMapping"]:
            if isinstance(result, list):
                new_parameters[mapping["parameterName"]] = [
                    follow_path(el, mapping["path"]) for el in result
                ]
            else:
                new_parameters[mapping["parameterName"]] = result[mapping["path"]]
        if rest["@type"] == "CompositeQuery":
            return execute_composite_query(forge, rest["head"], new_parameters, rest["rest"])
        else:
            return execute_query(forge, rest, new_parameters)
        

def execute_search_query(forge, rule, parameters):
    return execute_composite_query(forge, rule["searchQuery"], parameters)

# Principle: PC cells in neocortex have the same E-behaviour

## Rule A (simple query)

Input parameters: brain_region, MType
If (premise)
   (brain region)-[is_part_of]->({id: isocortex})
   (mtype)-[IS_SUBCLASS_OF]->({id: PC})

Return X s.t.
- (X)-[IS_A]->(Trace)
- (X)-[subject/species]->({id: Mouse})
- (X)-[brainLocation/brainRegion]->(brain_region)-[is_part_of]->({id: isocortex})
- (X)-[brainLocation/layer]->(brain layer {id: L5})
- (X)-[hasMType]->(MType)-[IS_SUBCLASS_OF]->({id: PC})

In [66]:
premise = [
   {
        "@type": "SPARQLQuery",
        "hasParameter": [
            {
                "name": "brainRegion",
                "description": "URI of the query brain region",
                "type": "sparql_uri"
            }
        ],
        "hasBody": """
            SELECT ?x
            WHERE {
                <$brainRegion> <http://schema.org/isPartOf>* <http://api.brain-map.org/api/v2/data/Structure/315> .
            }
        """
    },
    {
        "@type": "SPARQLQuery",
        "hasParameter": [
            {
                "name": "MType",
                "description": "URI of the query MType",
                "type": "sparql_uri"
            }
        ],
        "hasBody": """
            SELECT ?x
            WHERE {
                <$MType> rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
            }
        """
    },
    {
        "@type": "ForgeSearchQuery",
        "hasParameter": [
            {
                "name": "brainRegion",
                "description": "URI of the query brain region",
                "type": "sparql_uri"
            }
        ],
        "hasBody": {
            "targetParameter": "brainRegion",
            "targetPath": "id",
            "pattern": {
                "isPartOf": "http://api.brain-map.org/api/v2/data/Structure/315"
            }
        }
    },
]

In [67]:
search_query_A = {
    "@type": "SPARQLQuery",
    "hasBody": """
        SELECT ?resource
        WHERE {
          ?resource a <https://neuroshapes.org/Trace> ;
                      <https://neuroshapes.org/annotation>/<https://neuroshapes.org/hasBody> ?mtype ;
                      <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
                      <https://neuroshapes.org/brainLocation> ?location .
          ?mtype rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
          ?location <https://neuroshapes.org/brainRegion> ?brainRegion .
          ?brainRegion <http://schema.org/isPartOf>* <http://api.brain-map.org/api/v2/data/Structure/315> .
        }
    """
}

In [71]:
rule_A = {
    "@type": "GeneralizationRule",
    "name": "Trace generalization for cortical PCs",
    "description": "Collect traces belonging to PCs from other cortical regions",
    "premise": premise,
    "searchQuery": search_query_A
}

In [69]:
check_premise(forge, rule_A, input_filters)

True

In [70]:
execute_search_query(forge, rule_A, input_filters)

[{'resource': 'https://bbp.epfl.ch/neurosciencegraph/data/ff0e496d-b8a0-4c0e-8487-545563495dc0'},
 {'resource': 'https://bbp.epfl.ch/neurosciencegraph/data/2b108ccf-4ad2-4495-8490-17b2fde656fa'},
 {'resource': 'https://bbp.epfl.ch/neurosciencegraph/data/4fba4ab3-189c-4ff4-96bf-6904c1b8912b'}]

## Rule B (query + similarity search)

Input parameters: brain_region, MType, similarityModelId, topNeighbors

Find closest MType and then return traces/morphs with the closest MType that is also a subclass of pyramidal neuron and that are located in a cortical region.

If (premise)
   (brain region)-[is_part_of]->({id: isocortex})
   (mtype)-[IS_SUBCLASS_OF]->({id: PC})


Return Y s.t.
- (Y)-[hasMType]->(MTypeY)
- Select top K MTypeY by sim_{similarityModel}(MType, MTypeY) 

(or for traces)
Return Y s.t.
- (Y)-[hasEType]->(ETypeY)
- Select either top K ETypeY by sim_{similarityModel}(ETypeX, ETypeY) or s.t sim_{similarityModel}(ETypeX, ETypeY) > similarityThreshold

In [61]:
set_elastic_view(forge, "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view")

https://staging.nexus.ocp.bbp.epfl.ch/v1/views/dke/inference-test
('https://staging.nexus.ocp.bbp.epfl.ch/v1/views/dke/inference-test', 'https%3A%2F%2Fbbp.epfl.ch%2Fneurosciencegraph%2Fdata%2Fmtype-embedding-view', '_search')


__Notes__:

- Similarity-based query is not allowed in premises
- You can use results from the previous query or input params in the next query

TODO: make rule know which view to use
- "configuration": "https://bbp.epfl.ch/neurosciencegraph/data/ontology-class-recommender-config",
- "similarityModels": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel",          

In [65]:
search_query_B =  {
    "@type": "CompositeQuery",
    "head": {
        # Query type
        "@type": "SPARQLQuery",
        # Query body
        "hasBody": """
            SELECT ?mtype
            WHERE {
                ?mtype rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
                ?trace <https://neuroshapes.org/annotation>/<https://neuroshapes.org/hasBody> ?mtype.
            }
        """,
        # Mapping of query results to the parameters (passed to the next query)
        "resultParameterMapping": [
            {
                "path": "mtype",
                "parameterName": "acceptableResults"
            }
        ]
    },
    "rest": { 
        "@type": "CompositeQuery",
        "head": {
            # Query type
            "@type": "SimilarityQuery",
            # Number of top results to return
            "k": 1,
            # Specification of input parameters
            "hasParameter": [
                {
                    "name": "MType",
                    "description": "URI of the query MType",
                    "type": "str"
                },
                {
                    "name": "acceptableResults",
                    "description": "List of URI's of acceptable MTypes (all PCs)",
                    "type": "list"
                }
            ],
            # Query to retrieve embeddings
            "embeddingQuery": """
                {
                  "from": 0,
                  "size": 1,
                  "query": {
                    "bool": {
                      "must": [
                        {
                          "nested": {
                            "path": "derivation.entity",
                            "query": {
                              "terms": {
                                "derivation.entity.@id": [$MType]
                              }
                            }
                          }
                        }
                      ]
                    }
                  }
                }
            """, 
            # Filter to apply on the similarity search results
            "resultFilter": """
                "must": {
                    "nested": {
                        "path": "derivation.entity",
                        "query": {
                            "terms": { "derivation.entity.@id": [$acceptableResults] }
                        }
                    }
                }
            """,
            # Mapping of query results to the parameters (passed to the next query)
            "resultParameterMapping": [{
                "path": "derivation.entity.@id",
                "parameterName": "closestMType"
            }]
        },
        "rest": {
            # Query type
            "@type": "SPARQLQuery",
            # Specification of input parameters
            "hasParameter": [
                {
                    "name": "closestMType",
                    "description": "URI of the closest acceptable MType",
                    "type": "sparql_uri"
                }
            ],
            # Query body
            "hasBody": """
                SELECT ?id
                WHERE {
                    ?id a <https://neuroshapes.org/Trace> ;
                        <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
                        <https://neuroshapes.org/annotation> ?annotation.
                    ?annotation <https://neuroshapes.org/hasBody> <$closestMType>.
                }
            """
        }
    }
}

In [72]:
rule_B = {
    "@type": "GeneralizationRule",
    "name": "MType similarity based trace generalization for cortical PCs",
    "description": "Collect traces belonging to the closest (PC) MType from other cortical regions",
    "premise": premise,
    "searchQuery": search_query_B
}

In [76]:
check_premise(forge, rule_B, input_filters)

True

In [77]:
execute_search_query(forge, rule_B, input_filters)

[{'id': 'https://bbp.epfl.ch/neurosciencegraph/data/ff0e496d-b8a0-4c0e-8487-545563495dc0'}]