In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from string import Template

import numpy as np
import pandas as pd

import getpass
import json

from collections import defaultdict, namedtuple

from urllib.parse import quote_plus

from kgforge.core import KnowledgeGraphForge

from inference_tools.query.elastic_search import set_elastic_view
from inference_tools.utils import (check_premises,
                                   execute_query,
                                   execute_query_pipe,
                                   apply_rule,
                                   get_rule_parameters)

In [3]:
TOKEN = getpass.getpass()

In [4]:
ENDPOINT = "https://staging.nise.bbp.epfl.ch/nexus/v1"

In [5]:
forge = KnowledgeGraphForge(
    "../../configs/new-forge-config.yaml",
    endpoint=ENDPOINT,
    token=TOKEN, 
    bucket="dke/inference-test")

A factory that creates forge sessions given org + project.

In [6]:
# This could be replaced by any util kg-inference-api implements to handle user forge sessions
FORGE_SESSIONS = {}
def allocate_forge_session(org, project):
    if (org, project) not in FORGE_SESSIONS:
        session = KnowledgeGraphForge(
            "https://raw.githubusercontent.com/BlueBrain/nexus-forge/master/examples/notebooks/use-cases/prod-forge-nexus.yml",
            endpoint=ENDPOINT,
            token=TOKEN, 
            bucket=f"{org}/{project}")
        FORGE_SESSIONS[(org, project)] = session
    return FORGE_SESSIONS[(org, project)]

# Inference Rules

Get filters from atlas (or advanced query)

We get filters from atlas (or advanced query), for example:

    BrainRegionQueryParameter "http://api.brain-map.org/api/v2/data/Structure/31"  # ACA
    MTypeQueryParameter "http://uri.interlex.org/base/ilx_0381377"  # L6_UPC
    SpeciesQueryParameter "http://purl.obolibrary.org/obo/NCBITaxon_10088"   # Mouse

In [7]:
input_filters = {
    "BrainRegionQueryParameter": "http://api.brain-map.org/api/v2/data/Structure/31",
    "MTypeQueryParameter": "http://uri.interlex.org/base/ilx_0381377",
    "SpeciesQueryParameter": "http://purl.obolibrary.org/obo/NCBITaxon_10088",
}

Show all rules whose premises this search satisfies

- `KGQuery`: `SparqlQuery`, `ESQuery`, `ForgeSearchQuery`
- `RulePremise`: `PropertyPremise` 

Components of a `QueryBasedGeneralizationRule`:

- __Rule Parameters__ values from atlas filters or advanced query (species, brain region, cell type (m/e/t), datatype) 
- __Premises__ need to be satisfied on provided rule params for the rule to be applied
- __Query__ is applied to retrieve resources (result of inference). Queries can use the provided rule parameters

Course of actions:
    - Take filters
    - Take all the available rules (?)
    - Search for rules whose premises filters satisfy:
       -  For each rule execute 'premise query' filled with parameters generated from filters
      - Return rules whose premises are satisfied
    - Apply queries of satisfied rules

# Principle: PC cells in neocortex have the same E-behaviour

## Rule A (simple query)

Input parameters: brain_region, MType
If (premise)
   (brain region)-[is_part_of]->({id: isocortex})
   (mtype)-[IS_SUBCLASS_OF]->({id: PC})

Return X s.t.
- (X)-[IS_A]->(Trace)
- (X)-[subject/species]->({id: Mouse})
- (X)-[brainLocation/brainRegion]->(brain_region)-[is_part_of]->({id: isocortex})
- (X)-[brainLocation/layer]->(brain layer {id: L5})
- (X)-[hasMType]->(MType)-[IS_SUBCLASS_OF]->({id: PC})

In [8]:
premise = [
   {
        "type": "SparqlPremise",
        "hasParameter": [
            {
                "name": "BrainRegionQueryParameter",
                "description": "URI of the query brain region",
                "type": "uri"
            }
        ],
        "hasBody": """
            SELECT ?x
            WHERE {
                <$BrainRegionQueryParameter> <http://schema.org/isPartOf>* <http://api.brain-map.org/api/v2/data/Structure/315> .
            }
        """,
        "queryConfiguration": {
            "org": "dke",
            "project": "inference-test",
            "sparqlView": {
                "id": "https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex",
                "type": "SparqlView"
            }
        }
    },
    {
        "type": "SparqlPremise",
        "hasParameter": [
            {
                "name": "MTypeQueryParameter",
                "description": "URI of the query MType",
                "type": "uri"
            }
        ],
        "hasBody": """
            SELECT ?x
            WHERE {
                <$MTypeQueryParameter> rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
            }
        """,
        "queryConfiguration": {
            "org": "dke",
            "project": "inference-test"
        }
    },
    {
        "type": "ForgeSearchPremise",
        "hasParameter": [
            {
                "name": "BrainRegionQueryParameter",
                "description": "URI of the query brain region",
                "type": "uri"
            }
        ],
        "targetParameter": "BrainRegionQueryParameter",
        "targetPath": "id",
        "pattern": {
            "isPartOf": "http://api.brain-map.org/api/v2/data/Structure/315"
        },
        "queryConfiguration": {
            "org": "dke",
            "project": "inference-test"
        }
    },
]

In [9]:
search_query_A = {
    "type": "SparqlQuery",
    "hasBody": """
        SELECT ?id
        WHERE {
          ?id a <https://neuroshapes.org/Trace> ;
                      <https://neuroshapes.org/annotation>/<https://neuroshapes.org/hasBody> ?mtype ;
                      <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
                      <https://neuroshapes.org/brainLocation>/<https://neuroshapes.org/brainRegion> ?brainRegion.
          ?mtype rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
          ?brainRegion <http://schema.org/isPartOf>* <http://api.brain-map.org/api/v2/data/Structure/315> .
        }
    """,
    "queryConfiguration": {
        "org": "dke",
        "project": "inference-test"
    }
}

In [10]:
rule_A = {
    "type": "DataGeneralizationRule",
    "name": "Trace generalization for cortical PCs",
    "description": "Collect traces belonging to PCs from other cortical regions",
    "premise": premise,
    "searchQuery": search_query_A
}

In [11]:
rule_B = {
    "type": "DataGeneralizationRule",
    "name": "Trace generalization for cortical PCs",
    "description": "Collect traces belonging to PCs from other cortical regions",
    "premise": [
       {
            "type": "SparqlPremise",
            "hasParameter": [
                {
                    "name": "BrainRegionQueryParameter",
                    "description": "URI of the query brain region",
                    "type": "uri"
                }
            ],
            "hasBody": """
                SELECT ?x
                WHERE {
                    <$BrainRegionQueryParameter> <http://schema.org/isPartOf>* <http://api.brain-map.org/api/v2/data/Structure/315> .
                }
            """,
            "queryConfiguration": {
                "org": "dke",
                "project": "inference-test",
                "sparqlView": {
                    "id": "https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex",
                    "type": "SparqlView"
                }
            }
        },
        {
            "type": "SparqlPremise",
            "hasParameter": [
                {
                    "name": "MTypeQueryParameter",
                    "description": "URI of the query MType",
                    "type": "uri"
                }
            ],
            "hasBody": """
                SELECT ?x
                WHERE {
                    <$MTypeQueryParameter> rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
                }
            """,
            "queryConfiguration": {
                "org": "dke",
                "project": "inference-test"
            }
        },
        {
            "type": "ForgeSearchPremise",
            "hasParameter": [
                {
                    "name": "BrainRegionQueryParameter",
                    "description": "URI of the query brain region",
                    "type": "uri"
                }
            ],
            "targetParameter": "BrainRegionQueryParameter",
            "targetPath": "id",
            "pattern": {
                "isPartOf": "http://api.brain-map.org/api/v2/data/Structure/315"
            },
            "queryConfiguration": {
                "org": "dke",
                "project": "inference-test"
            }
        },
    ],
    "searchQuery": {
        "type": "QueryPipe",
        "head": {
            # Query type
            "type": "SparqlQuery",
            "queryConfiguration": {
                "org": "dke",
                "project": "inference-test",
                "sparqlView": {
                    "id": "https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex",
                    "type": "SparqlView"
                }
            },
            # Query body
            "hasBody": """
                SELECT ?mtype
                WHERE {
                    ?mtype rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
                    ?trace <https://neuroshapes.org/annotation>/<https://neuroshapes.org/hasBody> ?mtype.
                }
            """,
            # Mapping of query results to the parameters (passed to the next query)
            "resultParameterMapping": [
                {
                    "path": "mtype",
                    "parameterName": "acceptableResults"
                }
            ]
        },
        "rest": { 
            "type": "QueryPipe",
            "head": {
                # Query type
                "type": "SimilarityQuery",
                # Number of top results to return
                "k": 1,
                "queryConfiguration": {
                    "org": "dke",
                    "project": "inference-test",
                    "embeddingModel": {
                        "id": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel",
                        "type": "EmbeddingModel"
                    },
                    "similarityView": {
                        "id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view",
                        "type": "ElasticSearchView"
                    },
                    "scoreFormula": "euclidean",
                },
                # Specification of input parameters
                "hasParameter": [
                    {
                        "name": "MTypeQueryParameter",
                        "description": "URI of the query MType",
                        "type": "str"
                    },
                    {
                        "name": "acceptableResults",
                        "description": "List of URI's of acceptable MTypes (all PCs)",
                        "type": "list"
                    }
                ],
                # Query to retrieve embeddings
                "searchTargetParameter": "MTypeQueryParameter",
                # Filter to apply on the similarity search results
                "resultFilter": """
                    "must": {
                        "nested": {
                            "path": "derivation.entity",
                            "query": {
                                "terms": { "derivation.entity.@id": [$acceptableResults] }
                            }
                        }
                    }
                """,
                # Mapping of query results to the parameters (passed to the next query)
                "resultParameterMapping": [{
                    "path": "id",
                    "parameterName": "closestMType"
                }]
            },
            "rest": {
                # Query type
                "type": "SparqlQuery",
                # Specification of input parameters
                "hasParameter": [
                    {
                        "name": "closestMType",
                        "description": "URI of the closest acceptable MType",
                        "type": "uri"
                    }
                ],
                "queryConfiguration": {
                    "org": "dke",
                    "project": "inference-test",
                    "sparqlView": {
                        "id": "https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex",
                        "type": "SparqlView"
                    }
                },
                # Query body
                "hasBody": """
                    SELECT ?id
                    WHERE {
                        ?id a <https://neuroshapes.org/Trace> ;
                            <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
                            <https://neuroshapes.org/annotation> ?annotation ;
                            <https://neuroshapes.org/brainLocation>/<https://neuroshapes.org/brainRegion> ?brainRegion.
                        ?annotation <https://neuroshapes.org/hasBody> <$closestMType>.
                    }
                """
            }
        }
    }
}

In [12]:
%time
check_premises(allocate_forge_session, rule_A, input_filters)

CPU times: user 1 µs, sys: 1 µs, total: 2 µs
Wall time: 5.25 µs


True

In [13]:
%time
apply_rule(allocate_forge_session, rule_A, input_filters)

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 7.15 µs


[{'id': 'https://bbp.epfl.ch/neurosciencegraph/data/ff0e496d-b8a0-4c0e-8487-545563495dc0'},
 {'id': 'https://bbp.epfl.ch/neurosciencegraph/data/2b108ccf-4ad2-4495-8490-17b2fde656fa'},
 {'id': 'https://bbp.epfl.ch/neurosciencegraph/data/4fba4ab3-189c-4ff4-96bf-6904c1b8912b'}]

## Rule B (query + similarity search)

Input parameters: brain_region, MType, similarityModelId, topNeighbors

Find closest MType and then return traces/morphs with the closest MType that is also a subclass of pyramidal neuron and that are located in a cortical region.

If (premise)
   (brain region)-[is_part_of]->({id: isocortex})
   (mtype)-[IS_SUBCLASS_OF]->({id: PC})


Return Y s.t.
- (Y)-[hasMType]->(MTypeY)
- Select top K MTypeY by sim_{similarityModel}(MType, MTypeY) 

(or for traces)
Return Y s.t.
- (Y)-[hasEType]->(ETypeY)
- Select either top K ETypeY by sim_{similarityModel}(ETypeX, ETypeY) or s.t sim_{similarityModel}(ETypeX, ETypeY) > similarityThreshold

__Notes__:

- Similarity-based query is not allowed in premises
- You can use results from the previous query or input params in the next query

TODO: make rule know which view to use
- "configuration": "https://bbp.epfl.ch/neurosciencegraph/data/ontology-class-recommender-config",
- "similarityModels": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel",          

In [14]:
search_query_B =  {
    "type": "QueryPipe",
    "head": {
        # Query type
        "type": "SparqlQuery",
        "queryConfiguration": {
            "org": "dke",
            "project": "inference-test",
            "sparqlView": {
                "id": "https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex",
                "type": "SparqlView"
            }
        },
        # Query body
        "hasBody": """
            SELECT ?mtype
            WHERE {
                ?mtype rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
                ?trace <https://neuroshapes.org/annotation>/<https://neuroshapes.org/hasBody> ?mtype.
            }
        """,
        # Mapping of query results to the parameters (passed to the next query)
        "resultParameterMapping": [
            {
                "path": "mtype",
                "parameterName": "acceptableResults"
            }
        ]
    },
    "rest": { 
        "type": "QueryPipe",
        "head": {
            # Query type
            "type": "SimilarityQuery",
            # Number of top results to return
            "k": 1,
            "queryConfiguration": {
                "org": "dke",
                "project": "inference-test",
                "embeddingModel": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel",
                    "type": "EmbeddingModel"
                },
                "similarityView": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view",
                    "type": "ElasticSearchView"
                },
                "scoreFormula": "euclidean",
            },
            # Specification of input parameters
            "hasParameter": [
                {
                    "name": "MTypeQueryParameter",
                    "description": "URI of the query MType",
                    "type": "str"
                },
                {
                    "name": "acceptableResults",
                    "description": "List of URI's of acceptable MTypes (all PCs)",
                    "type": "list"
                }
            ],
            # Query to retrieve embeddings
            "searchTargetParameter": "MTypeQueryParameter",
            # Filter to apply on the similarity search results
            "resultFilter": """
                "must": {
                    "nested": {
                        "path": "derivation.entity",
                        "query": {
                            "terms": { "derivation.entity.@id": [$acceptableResults] }
                        }
                    }
                }
            """,
            # Mapping of query results to the parameters (passed to the next query)
            "resultParameterMapping": [{
                "path": "id",
                "parameterName": "closestMType"
            }]
        },
        "rest": {
            # Query type
            "type": "SparqlQuery",
            # Specification of input parameters
            "hasParameter": [
                {
                    "name": "closestMType",
                    "description": "URI of the closest acceptable MType",
                    "type": "uri"
                }
            ],
            "queryConfiguration": {
                "org": "dke",
                "project": "inference-test",
                "sparqlView": {
                    "id": "https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex",
                    "type": "SparqlView"
                }
            },
            # Query body
            "hasBody": """
                SELECT ?id
                WHERE {
                    ?id a <https://neuroshapes.org/Trace> ;
                        <https://bluebrain.github.io/nexus/vocabulary/deprecated> false ;
                        <https://neuroshapes.org/annotation> ?annotation ;
                        <https://neuroshapes.org/brainLocation>/<https://neuroshapes.org/brainRegion> ?brainRegion.
                    ?annotation <https://neuroshapes.org/hasBody> <$closestMType>.
                }
            """
        }
    }
}

In [15]:
rule_B = {
    "type": "DataGeneralizationRule",
    "name": "MType similarity based trace generalization for cortical PCs",
    "description": "Collect traces belonging to the closest (PC) MType from other cortical regions",
    "premise": premise,
    "searchQuery": search_query_B
}

In [16]:
%time
check_premises(allocate_forge_session, rule_B, input_filters)

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 6.91 µs


True

In [17]:
%time
apply_rule(allocate_forge_session, rule_B, input_filters)

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 7.87 µs


[{'id': 'https://bbp.epfl.ch/neurosciencegraph/data/ff0e496d-b8a0-4c0e-8487-545563495dc0'}]

## Rule C


multiple similarity models.
We also use a similarity rule with parameterizable number of neighbors, and will show how to specify models to ignore.

In [18]:
search_query_C = {
    "type": "QueryPipe",
    "head": {
        "type": "SparqlQuery",
        "queryConfiguration": {
            "org": "dke",
            "project": "inference-test",
            "sparqlView": {
                "id": "https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex",
                "type": "SparqlView"
            }
        },
        "hasBody": """
            SELECT ?mtype
            WHERE {
                ?mtype rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
            }
        """,
        "resultParameterMapping": [{
            "path": "mtype",
            "parameterName": "acceptableMTypes"
        }]
    },
    "rest": {
        "type": "SimilarityQuery",
        "k": "$KNearestNeighborsParameter",
        # Configuration of similarity views
        # (multiple records mean that multiple models are combined)
        "queryConfiguration": [
            {
                "org": "dke",
                "project": "inference-test",
                "embeddingModel": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel",
                    "type": "EmbeddingModel"
                },
                "similarityView": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view",
                    "type": "ElasticSearchView"
                },
                "boosted": True,
                "boostingView": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-boosting-view",
                    "type": "ElasticSearchView"
                },
                "statisticsView": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-statistics-view",
                    "type": "ElasticSearchView"
                }
            },
            {
                "org": "dke",
                "project": "inference-test",
                "embeddingModel": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/MTypeEmbeddingModel2",
                    "type": "EmbeddingModel"
                },
                "similarityView": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-embedding-view2",
                    "type": "ElasticSearchView"
                },
                "boosted": True,
                "boostingView": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-boosting-view2",
                    "type": "ElasticSearchView"
                },
                "statisticsView": {
                    "id": "https://bbp.epfl.ch/neurosciencegraph/data/mtype-statistics-view2",
                    "type": "ElasticSearchView"
                }
            }
        ],
        "hasParameter": [
            {
                "name": "MTypeQueryParameter",
                "description": "URI of the query MType",
                "type": "str"
            },
            {
                "type": "int",
                "description": "Number of the nearest neighbors to return",
                "name": "KNearestNeighborsParameter"
            },
            {
                "name": "acceptableMTypes",
                "description": "URIs of acceptable MTypes (subclasses of PC).",
                "type": "list"
            }
        ],
        "searchTargetParameter": "MTypeQueryParameter",
        # Filter to apply on the similarity search results
        "resultFilter": """
            "must": {
                "nested": {
                    "path": "derivation.entity",
                    "query": {
                        "terms": { "derivation.entity.@id": [$acceptableMTypes] }
                    }
                }
            }
        """
    }
}

In [19]:
rule_C = {
    "@type": "DataGeneralizationRule",
    "name": "Combined (PC) MType similarity",
    "description": "Get the closest (PC) MType according to a combined similarity",
    "premise": [
        {
            "type": "SparqlPremise",
            "hasParameter": [
                {
                    "name": "MTypeQueryParameter",
                    "description": "URI of the query MType",
                    "type": "uri"
                }
            ],
            "hasBody": """
                SELECT ?x
                WHERE {
                    <$MTypeQueryParameter> rdfs:subClassOf* <https://neuroshapes.org/PyramidalNeuron> .
                }
            """,
            "queryConfiguration": {
                "org": "dke",
                "project": "inference-test",
                "sparqlView": {
                    "id": "https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex",
                    "type": "SparqlView"
                }
            }
        }
    ],
    "searchQuery": search_query_C
}

In [20]:
input_filters["KNearestNeighborsParameter"] = 3

In [21]:
apply_rule(allocate_forge_session, rule_C, input_filters)

[{'id': 'https://neuroshapes.org/TufterdPyramidalNeuron'},
 {'id': 'http://uri.interlex.org/base/ilx_0381371'},
 {'id': 'https://neuroshapes.org/UntufterdPyramidalNeuron'}]