In [31]:
import getpass
import json
import uuid

from kgforge.core import KnowledgeGraphForge

from inference_tools.utils import (
    check_premises,
    execute_query,
    execute_query_pipe,
    apply_rule,
    get_rule_parameters,
    _allocate_forge_session
)

In [32]:
def allocate_forge_session(org, project):

    CONFIG_PATH = "../../configs/test-config.yaml"
    ENDPOINT = "https://bbp.epfl.ch/nexus/v1"

    searchendpoints = {
        "sparql": {
           "endpoint": "https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-sp/dataset"
       }
    }
    return _allocate_forge_session(
        org=org, project=project, 
        config_file_path=CONFIG_PATH, 
        endpoint=ENDPOINT, 
        searchendpoints=searchendpoints, 
        token_file_path="token.txt"
    )

# [Create a Rule able to Generalize data across the brain region hierarchy](https://bbpteam.epfl.ch/project/issues/browse/DKE-1105)

In [33]:
input_filters = {
    "BrainRegionQueryParameter": "<http://api.brain-map.org/api/v2/data/Structure/315>",
    "TypeQueryParameter": "<https://neuroshapes.org/NeuronMorphology>"
}

## V1: QueryPipe: 
- 1. Get all Neuron morphologies 
- 2. Only keep those in the right brain region

#### Problem: timing out, too many neuron morphologies retrieved and passed to the next step?

In [34]:
rule_id = f"https://bbp.epfl.ch/neurosciencegraph/data/{uuid.uuid4()}"


fq =   """
    SELECT ?id ?br
    WHERE { 
        ?id rdf:type $TypeQueryParameter .
        ?id nsg:brainLocation/nsg:brainRegion ?br .
        
        ?id <https://bluebrain.github.io/nexus/vocabulary/deprecated> ?_deprecated .
        Filter (?_deprecated = 'false'^^xsd:boolean)
        } 
"""

fqo = {
        "type": "SparqlQuery",
        "hasBody": fq,
        "hasParameter": [
            {
                "type": "uri",
                "description": "type of the entity being retrieved",
                "name": "TypeQueryParameter"
            },
        ],
        "queryConfiguration": {
            "sparlqlView": {
                "id": "https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-sp/dataset"
            },
            "org": "bbp",
            "project": "atlas"
        },
        "resultParameterMapping": [
            {
                "parameterName": "entityBr",
                "path": "br"
            },
            {
                "parameterName": "entity",
                "path": "id"
            }
        ]
    }

sq = """
    SELECT ?id ?br
    WHERE {
     ?br rdfs:subClassOf BrainRegion ;   
     <http://schema.org/isPartOf>* $BrainRegionQueryParameter.
        VALUES (?id) {
            $entity
        }.
        VALUES (?br){
            $entityBr
        }
    }
"""

sqo = {
    "type": "SparqlQuery",
    "hasBody": sq,
    "hasParameter": [
    {
        "type": "sparql_value_list",
        "description": "List of URIs of brains regions",
        "name": "entityBr"
    }, 
    {
        "type": "sparql_value_list",
        "description": "List of URIs of entities of type",
        "name": "entity"
    }, 
    {
        "type": "uri",
        "description": "URI of the queried brain region",
        "name": "BrainRegionQueryParameter"
    }
    ],
    "queryConfiguration": {
        "sparlqlView": {
            "id": "https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-sp/dataset"
        },
        "org": "neurosciencegraph",
        "project": "datamodels"
    },
    "resultParameterMapping": {
        "parameterName": "test",
        "path": "entity"
    }
}


my_rule = {
    "id": rule_id,
    "type": "DataGeneralizationRule",
    "_schemaProject": "https://bbp.epfl.ch/nexus/v1/projects/bbp/inference-rules",
    "description": "Generalise up (ancestors) and/or down (descendants) a (combination of)"+
    "hierarchy in a BBP ontology (e.g. cell type, brain region)",
    "name": "TODO", #TODO
    "searchQuery": {
        "type": "QueryPipe",
        "head": fqo, 
        "rest": sqo
    }
}

In [35]:
# res = apply_rule(allocate_forge_session, my_rule, input_filters)
# res

## QueryPipe: 
- 1. Get all valid brain regions
- 2. Query Neuron Morphologies and keep only those with the valid brain regions

In [36]:
rule_id2 = f"https://bbp.epfl.ch/neurosciencegraph/data/{uuid.uuid4()}"


fq2 = """
    SELECT ?id
    WHERE {
     ?id rdfs:subClassOf BrainRegion ;   
     <http://schema.org/isPartOf>* $BrainRegionQueryParameter.
    } LIMIT 10000
"""

fqo2 = {
        "type": "SparqlQuery",
        "hasBody": fq2,
        "hasParameter": [
            {
                "type": "uri",
                "description": "type of the brain region it should belong to",
                "name": "BrainRegionQueryParameter"
            },
        ],
        "queryConfiguration": {
            "sparlqlView": {
                "id": "https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-sp/dataset"
            },
            
            "org": "neurosciencegraph",
            "project": "datamodels"
        },
        "resultParameterMapping": [
            {
                "parameterName": "all_brs",
                "path": "id"
            },
        ]
    }


sq2 =   """
    SELECT ?id ?br
    WHERE { 
        ?id rdf:type $TypeQueryParameter .
        ?id nsg:brainLocation/nsg:brainRegion ?br .
        ?id <https://bluebrain.github.io/nexus/vocabulary/deprecated> ?_deprecated .
        Filter (?_deprecated = 'false'^^xsd:boolean &&  ?br IN $all_brs)
    } LIMIT 10000
"""

sqo2 = {
    "type": "SparqlQuery",
    "hasBody": sq2,
    "hasParameter": [
    {
        "type": "sparql_list",
        "description": "List of URIs of brains regions",
        "name": "all_brs"
    }, 
    {
        "type": "uri",
        "description": "type of the queried entity",
        "name": "TypeQueryParameter"
    }
    ],
    "queryConfiguration": {
        "sparlqlView": {
            "id": "https://bbp.epfl.ch/neurosciencegraph/data/views/aggreg-sp/dataset"
        },
        "org": "bbp",
        "project": "atlas"
    },
    "resultParameterMapping": {
        "parameterName": "test",
        "path": "entity"
    }
}


my_rule2 = {
    "id": rule_id2,
    "type": "DataGeneralizationRule",
    "_schemaProject": "https://bbp.epfl.ch/nexus/v1/projects/bbp/inference-rules",
    "description": """
        Given an entity type (e.g NeuronMorphology, Trace, cell type, ...) 
        linked with a given brain region (e.g. Isocortex), generalise to entities associated 
        with the descendants of the region (e.g Gustatory areas)
    """,
    "name": """
        Generalise up (ancestors) and/or down (descendants) a (combination of) 
        "hierarchy in a BBP ontology (e.g. cell type, brain region)
    """,
     "searchQuery": {
        "type": "QueryPipe",
        "head": fqo2, 
        "rest": sqo2
    }
   
}

In [37]:
res2 = apply_rule(allocate_forge_session, my_rule2, input_filters, debug=False)
len(res2)

619

# Expected

## 1. Get all neuron morphologies

In [38]:
bbp_atlas_forge = allocate_forge_session("bbp", "atlas")

### a. SPARQL - KG_Inference - All but deprecated

In [39]:
first_query_res = execute_query(allocate_forge_session, fqo, input_filters, last_query=True)

In [40]:
len(first_query_res)

3295

### b. SPARQL - Forge

In [41]:
DEBUG = False
LIMIT = 10000

#### A. 4380 when getting all included deprecated

In [42]:
qtr = """
    SELECT ?id ?br 
    WHERE { 
        ?id rdf:type <https://neuroshapes.org/NeuronMorphology> .
        ?id nsg:brainLocation/nsg:brainRegion ?br .
    } 
"""
first_query_res = bbp_atlas_forge.as_json(
    bbp_atlas_forge.sparql(qtr, limit=LIMIT, debug=DEBUG))

len(first_query_res)

4380

#### B. 3295 when getting all but deprecated

In [43]:
qtr2 = """
    SELECT ?id ?br 
    WHERE { 
        ?id rdf:type <https://neuroshapes.org/NeuronMorphology> .
        ?id nsg:brainLocation/nsg:brainRegion ?br .
        
        ?id <https://bluebrain.github.io/nexus/vocabulary/deprecated> ?_deprecated .
        Filter (?_deprecated = 'false'^^xsd:boolean)
    } 
"""
first_query_res2 = bbp_atlas_forge.as_json(
    bbp_atlas_forge.sparql(qtr2, limit=LIMIT, debug=DEBUG))
len(first_query_res2)

3295

#### C. 3400 additional ones when getting rev, updatedAt, updatedBy (replicating forge search below)

In [44]:
qtr3 = """
   SELECT ?id ?_deprecated ?_rev 
   WHERE { Graph ?g {?id rdf:type <https://neuroshapes.org/NeuronMorphology>;
    <https://bluebrain.github.io/nexus/vocabulary/deprecated> ?_deprecated;
    <https://bluebrain.github.io/nexus/vocabulary/rev> ?_rev;
    <https://bluebrain.github.io/nexus/vocabulary/updatedAt> ?_updatedAt;
    <https://bluebrain.github.io/nexus/vocabulary/updatedBy> ?_updatedBy . 
    Filter (?_deprecated = 'false'^^xsd:boolean)
   }}
"""
first_query_res3 = bbp_atlas_forge.as_json(
    bbp_atlas_forge.sparql(qtr3, limit=LIMIT, debug=DEBUG))
len(first_query_res3)

3400

### c. Forge Search

In [45]:
first_res = bbp_atlas_forge.search({"type": input_filters["TypeQueryParameter"]}, 
                                   cross_bucket=True, limit=LIMIT, debug=DEBUG)

In [46]:
first_res_format = [{
    "br": el["brainLocation"]["brainRegion"]["id"], 
    "id": el["id"]
} for el in bbp_atlas_forge.as_json(first_res)]

In [47]:
len(first_res_format)

3400

## 2. Get all brain regions that are subclasses* of Isocortex

### SPARQL - Forge

In [48]:
bbp_datamodels_forge = allocate_forge_session("neurosciencegraph", "datamodels")

In [49]:
second_res = bbp_datamodels_forge.sparql("""
    SELECT ?br
    WHERE {
     ?br rdfs:subClassOf BrainRegion .
     ?br <http://schema.org/isPartOf>* <http://api.brain-map.org/api/v2/data/Structure/315> .
    }
""", limit=LIMIT)

In [50]:
second = [el["br"] for el in bbp_datamodels_forge.as_json(second_res)]

In [51]:
len(second)

374

## 3. Keep only the neuron morphologies that have their brain regions as a subclass* of isocortex

### Using the b.B version

In [52]:
first = first_query_res2

In [53]:
merge_two = [el for el in first if el["br"] in second]

In [54]:
len(merge_two)

619

## Same result & approach as V2 rule

# Persisting:

In [55]:
# rule_forge = allocate_forge_session("bbp", "inference-rules")

In [56]:
# rule_forge.register(rule_forge.from_json(my_rule2))

In [57]:
# my_rule2_res = rule_forge.retrieve(rule_id2)

In [58]:
# rule_forge.tag(my_rule2_res, "v1")