In [23]:
import getpass
import json
import uuid

from kgforge.core import KnowledgeGraphForge

from inference_tools.utils import (check_premises,
                                   execute_query,
                                   execute_query_pipe,
                                   apply_rule,
                                   get_rule_parameters)

from inference_tools.type import (QueryType, ParameterType)

In [24]:
LOCAL_TOKEN = True 
TOKEN_PATH = 'token.txt'
CONFIG_PATH = "config.yaml"

def allocate_forge_session(org, project):
    ENDPOINT = "https://bbp.epfl.ch/nexus/v1"  

    if LOCAL_TOKEN: 
        with open(TOKEN_PATH) as f:
            TOKEN = f.read()
    else: 
        TOKEN = getpass.getpass()
        
    searchendpoints = {
        "sparql": {
           "endpoint": "https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-sp/dataset"
       }
    }
    
    return KnowledgeGraphForge(
        CONFIG_PATH,
        endpoint=ENDPOINT,
        token=TOKEN, 
        bucket=f"{org}/{project}", 
        searchendpoints=searchendpoints,
        debug=True
    )

# [Create a Rule able to Generalize data across the brain region hierarchy](https://bbpteam.epfl.ch/project/issues/browse/DKE-1105) - WITH CONTEXT

In [25]:
def init_rule(context=True):

    rule_id3 = f"https://bbp.epfl.ch/neurosciencegraph/data/{uuid.uuid4()}"


    fq3 = """
        SELECT ?id
        WHERE {            
            ?id rdfs:subClassOf $GeneralizedFieldName .     
            ?a schema:isPartOf* ?b .
            FILTER(?a = IF($SearchDown^^xsd:boolean, ?id, iri($GeneralizedFieldValue)))
            FILTER(?b = IF($SearchDown^^xsd:boolean, iri($GeneralizedFieldValue), ?id))
        } LIMIT 5000
    """

    fqo3 = {
        "type": QueryType.SPARQL_QUERY.value,
        "hasBody": fq3,
        "hasParameter": [
            {
                "type": ParameterType.PATH.value,
                "description": "Field Name being generalized",
                "name": "GeneralizedFieldName"
            },

            {
                "type": ParameterType.URI.value,
                "description": "Field Value being generalized",
                "name": "GeneralizedFieldValue"
            },

            {
                "type": ParameterType.STR.value,
                "description": "Whether we are searching for entities whose property" +
                "is a descendant or parent of the provided GeneralizedFieldValue",
                "name": "SearchDown",
            },
        ],
        "queryConfiguration": {
            "sparlqlView": {
                "id": "https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-sp/dataset"
            },

            "org": "neurosciencegraph",
            "project": "datamodels"
        },
        "resultParameterMapping": [
            {
                "parameterName": "all_values",
                "path": "id"
            },
        ]
    }

    sq3 = """
        SELECT ?id ?br
        WHERE { 
            ?id rdf:type $TypeQueryParameter .
            ?id $PathToGeneralizedField ?value .
            ?id $UserContext .
            ?id <https://bluebrain.github.io/nexus/vocabulary/deprecated> ?_deprecated .
            Filter (?_deprecated = 'false'^^xsd:boolean && ?value in $all_values)
        } LIMIT 5000
    """
    
    sq3_2 = """
        SELECT ?id ?br
        WHERE { 
            ?id rdf:type $TypeQueryParameter .
            ?id $PathToGeneralizedField ?value .
            ?id <https://bluebrain.github.io/nexus/vocabulary/deprecated> ?_deprecated .
            Filter (?_deprecated = 'false'^^xsd:boolean && ?value in $all_values)
        } LIMIT 5000
    """

    parameters = [
            {
                "type": ParameterType.SPARQL_LIST.value,
                "description": "All valid values for the generalized field",
                "name": "all_values"
            },
            {
                "type": ParameterType.PATH.value,
                "description": "type of the queried entity",
                "name": "TypeQueryParameter"
            },
            {
                "type": ParameterType.PATH.value,
                "description": "Path to Generalized Field",
                "name": "PathToGeneralizedField"
            }
        ]
    
    if context: 
        parameters.append({
            "type": ParameterType.MUTLI_PREDICATE_OBJECT_PAIR.value,
            "description": "Additional filtering on retrieved entities",
            "name": "UserContext"
        })

    sqo3 = {
        "type": "SparqlQuery",
        "hasBody": sq3 if context else sq3_2,
        "hasParameter": parameters,
        "queryConfiguration": {
            "sparlqlView": {
                "id": "https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-sp/dataset"
            },
            "org": "bbp",
            "project": "atlas"
        },
        "resultParameterMapping": {
            "parameterName": "test",
            "path": "entity"
        }
    }

    my_rule3 = {
        "id": rule_id3,
        "type": "DataGeneralizationRule",
        "_schemaProject": "https://bbp.epfl.ch/nexus/v1/projects/bbp/inference-rules",
        "description": """
            With Context - Given an entity type (e.g NeuronMorphology, Trace, cell type, ...) 
            linked to an entity by a provided path, generalise to entities associated 
            with the descendants or parents of the entity
        """,
        "name": """
            With Context - Generalise up (ancestors) and/or down (descendants) a (combination of) 
            "hierarchy in a BBP ontology (e.g. cell type, brain region)
        """,
        "searchQuery": {
            "type": "QueryPipe",
            "head": fqo3,
            "rest": sqo3
        },
        "targetResourceType": "Entity"
    }

    return my_rule3

def init_filters(searchDown=True):
    input_filters = {

    "TypeQueryParameter": "NeuronMorphology",
    "GeneralizedFieldName": "BrainRegion",
    "GeneralizedFieldValue": "<http://api.brain-map.org/api/v2/data/Structure/315>",
    "PathToGeneralizedField": "nsg:brainLocation/nsg:brainRegion",
    "SearchDown": "true" if searchDown else "false",
    "UserContext": [
        (
            ("rdf:type", "path"),
            ("NeuronMorphology", "path")
        ),  # redundant with the query content but just to show an example of multiple predicate object pairs
        (
           ("contribution/agent", "path"),
           ("<https://bbp.epfl.ch/neurosciencegraph/data/7c47aa15-9fc6-42ec-9871-d233c9c29028>", "path")
        )
    ]

    }
    return input_filters

# Expected

In [26]:
bbp_atlas_forge = allocate_forge_session("bbp", "atlas")

In [27]:
DEBUG = False
LIMIT = 10000

## 1.a Get all neuron morphologies + context

In [28]:
qtr2 = """
      SELECT ?id ?value
           WHERE { 
               ?id rdf:type nsg:NeuronMorphology .
               ?id nsg:brainLocation/nsg:brainRegion ?value .
               ?id <https://bluebrain.github.io/nexus/vocabulary/deprecated> ?_deprecated .
               ?id rdf:type nsg:NeuronMorphology .
               ?id nsg:contribution/prov:agent <https://bbp.epfl.ch/neurosciencegraph/data/7c47aa15-9fc6-42ec-9871-d233c9c29028> .
               Filter (?_deprecated = 'false'^^xsd:boolean)}
"""
first_query_res2 = bbp_atlas_forge.as_json(
    bbp_atlas_forge.sparql(qtr2, limit=LIMIT, debug=DEBUG))

len(first_query_res2)

331

## 1.b Get all neuron morphologies + no context

In [29]:
qtr3 = """
      SELECT ?id ?value
           WHERE { 
               ?id rdf:type nsg:NeuronMorphology .
               ?id nsg:brainLocation/nsg:brainRegion ?value .
               ?id <https://bluebrain.github.io/nexus/vocabulary/deprecated> ?_deprecated .
               Filter (?_deprecated = 'false'^^xsd:boolean)}
"""
first_query_res3 = bbp_atlas_forge.as_json(
    bbp_atlas_forge.sparql(qtr3, limit=LIMIT, debug=DEBUG))

len(first_query_res3)

3295

## 2.a Get all brain regions that are isPartOf* of Isocortex

In [30]:
bbp_datamodels_forge = allocate_forge_session("neurosciencegraph", "datamodels")

In [31]:
descendants_res = bbp_datamodels_forge.sparql("""
    SELECT ?br
    WHERE {
     ?br rdfs:subClassOf BrainRegion .
     ?br schema:isPartOf* <http://api.brain-map.org/api/v2/data/Structure/315> .
    }
""", limit=LIMIT)

In [32]:
descendants = [el["br"] for el in bbp_datamodels_forge.as_json(descendants_res)]

In [33]:
print("Descendants", len(descendants))

Descendants 374


## 2.b Get all brain regions that Isocortex isPartOf*

In [34]:
parents_res = bbp_datamodels_forge.sparql("""
    SELECT ?br
    WHERE {
     ?br rdfs:subClassOf BrainRegion .
     <http://api.brain-map.org/api/v2/data/Structure/315> schema:isPartOf* ?br  .
    }
""", limit=LIMIT)

In [35]:
parents = [el["br"] for el in bbp_datamodels_forge.as_json(parents_res)]

In [36]:
print("Parents", len(parents))

Parents 6


____

In [37]:
get_brain_regions = lambda searchDown : execute_query(allocate_forge_session, init_rule()["searchQuery"]["head"], init_filters(searchDown=searchDown), debug=False)

descendants_query = get_brain_regions(searchDown=True)
parents_query = get_brain_regions(searchDown=False)
print("Parents", len(parents_query))
print("Descendants", len(descendants_query))

Parents 6
Descendants 374


# Merge

In [38]:
run_rule = lambda context, searchDown: apply_rule(allocate_forge_session, init_rule(context=context), init_filters(searchDown=searchDown), debug=False)

## 3.a With context filters

## 3.a.1 Keep only the neuron morphologies (with context filters) that have their brain regions as a isPartOf* of Isocortex

In [39]:
merge_two_w_a = [el for el in first_query_res2 if el["value"] in descendants]

In [40]:
res_w_a = run_rule(searchDown=True, context=True)

In [41]:
print(len(merge_two_w_a), len(res_w_a))

0 0


## 3.a.2 Keep only the neuron morphologies (with context filters) that Isocortex isPartOf* 

In [42]:
merge_two_w_b = [el for el in first_query_res2 if el["value"] in parents]

In [43]:
res_w_b = run_rule(searchDown=False, context=True)

In [44]:
print(len(merge_two_w_b), len(res_w_b))

0 0


## 3.b Without context filters

## 3.b.1 Keep only the neuron morphologies (without context filters) that have their brain regions as a isPartOf* of Isocortex

In [45]:
merge_two_wo_a = [el for el in first_query_res3 if el["value"] in descendants]

In [46]:
res_wo_a = run_rule(searchDown=True, context=False)

In [47]:
print(len(merge_two_wo_a), len(res_wo_a))

619 619


## 3.b.2 Keep only the neuron morphologies (without context filters) that Isocortex isPartOf* 

In [48]:
merge_two_wo_b = [el for el in first_query_res3 if el["value"] in parents]

In [49]:
res_wo_b = run_rule(searchDown=False, context=False)

In [50]:
print(len(merge_two_wo_b), len(res_wo_b))

3 3


_____

In [59]:
# rule_forge = allocate_forge_session("bbp", "inference-rules")

# new_rule = rule_forge.retrieve("https://bbp.epfl.ch/neurosciencegraph/data/5d04995a-6220-4e82-b847-8c3a87030e0b")

# new_v = init_rule(context=True)

# new_rule.searchQuery = new_v["searchQuery"]

# rule_forge.update(new_rule)

# rule_forge.tag(new_rule, "v2")