# Accessing SCKAN from python
This notebook contains useful links for working with SCKAN from python.

Examples below show how to work with SCKAN directly from github using `neurondm`,  
as well how to query SCKAN using SPARQL.

## Useful links
### General introduction to `neurondm`
https://github.com/tgbugs/pyontutils/blob/master/neurondm/docs/NeuronLangExample.ipynb

### Retrieving neurons from git with `neurondm` extended example
https://github.com/tgbugs/pyontutils/blob/master/neurondm/docs/composer.py

### Example queries
https://github.com/SciCrunch/sparc-curation/blob/master/docs/queries.org

### General notebook setup
https://github.com/tgbugs/pyontutils/blob/master/neurondm/docs/neurons_notebook.md

### Docker setup
https://github.com/SciCrunch/sparc-curation/blob/master/docs/sckan/README.org

### Python setup
If you already have a python environment that can run notebooks run the following.

```bash
pip install neurondm
```

# Python

In [1]:
import os
import rdflib
from pyontutils.core import OntGraph, OntResIri, OntResPath
from pyontutils.namespaces import rdfs, ilxtr
from neurondm.core import Config, graphBase, log
from neurondm.core import OntTerm, OntId, RDFL
from neurondm import orders

config = Config('random-merge')
g = OntGraph()  # load and query graph

# remove scigraph and interlex calls
graphBase._sgv = None
del graphBase._sgv
if len(OntTerm.query._services) > 1:
    # backup services and avoid issues on rerun
    _old_query_services = OntTerm.query._services
    _noloc_query_services = _old_query_services[1:]

OntTerm.query._services = (RDFL(g, OntId),)

# base paths to ontology files
gen_neurons_path = 'ttl/generated/neurons/'
suffix = '.ttl'
orr = 'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/neurons/'
remote_base = orr + gen_neurons_path

# full imports
for f in ('apinat-partial-orders',
          'apinat-pops-more',
          'apinat-simple-sheet',
          'sparc-nlp'):
    ori = OntResIri(remote_base + f + suffix)
    [g.add(t) for t in ori.graph]

# label only imports
for f in ('apinatomy-neuron-populations',
          '../../npo'):
    p = os.path.normpath(gen_neurons_path + f)
    ori = OntResIri(orr + p + suffix)
    [g.add((s, rdfs.label, o)) for s, o in ori.graph[:rdfs.label:]]

config.load_existing(g)
neurons = config.neurons()  # scigraph required here if deps not removed above

[32m[2023-08-03 21:52:07,825][0m - [36m   DEBUG[0m -       neurondm - [34m         core.py:920 [0m - remote[0m
[32m[2023-08-03 21:52:08,220][0m - [36m   DEBUG[0m -       neurondm - [34m         core.py:939 [0m - [OntId('NIFRAW:neurons/ttl/phenotype-core.ttl'), OntId('NIFRAW:neurons/ttl/phenotype-indicators.ttl'), OntId('NIFRAW:neurons/ttl/phenotypes.ttl')][0m
[32m[2023-08-03 21:52:08,223][0m - [1;35m   ERROR[0m -       neurondm - [34m         core.py:984 [0m - Local git repo not on neurons branch!
Please run `git checkout neurons` in /home/tom/git/NIF-Ontology, set NIFSTD_CHECKOUT_OK= via export or at runtime, or set checkout_ok=True.[0m
Traceback (most recent call last):
  File "/home/tom/git/pyontutils/neurondm/neurondm/core.py", line 981, in __init__
    graphBase.configGraphIO(**kwargs)  # FIXME KILL IT WITH FIRE
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tom/git/pyontutils/neurondm/neurondm/core.py", line 1395, in configGraphIO
    raise graphBase.Git

In [2]:
print(neurons[0])

NeuronKblad(EntailedPhenotype('NCBITaxon:10116',
                              'ilxtr:hasInstanceInTaxon',
                              label='Rattus norvegicus'),
            Phenotype('ILX:0739295',
                      'ilxtr:hasSomaLocatedIn',
                      label='Thirteenth thoracic ganglion'),
            Phenotype('ILX:0739296',
                      'ilxtr:hasSomaLocatedIn',
                      label='fifth lumbar sympathetic ganglion'),
            Phenotype('ILX:0739297',
                      'ilxtr:hasSomaLocatedIn',
                      label='sixth lumbar sympathetic ganglion'),
            Phenotype('ILX:0786933',
                      'ilxtr:hasSomaLocatedIn',
                      label='Second lumbar ganglion'),
            Phenotype('ILX:0787009',
                      'ilxtr:hasSomaLocatedIn',
                      label='Twelfth thoracic ganglion'),
            Phenotype('ILX:0788315',
                      'ilxtr:hasSomaLocatedIn',
                   

# SPARQL

In [3]:
import io
import csv
import requests
from urllib.parse import quote as url_quote

blazegraph_endpoint = 'https://blazegraph.scicrunch.io/blazegraph/sparql'


def procq(res):
    _, (str_count,) = res
    return int(str_count)


def query(query, *, endpoint=blazegraph_endpoint, **kwargs):
    qq = url_quote(query, safe='')
    url = f'{endpoint}?query={qq}'
    headers = {'Accept': 'text/csv'}
    resp = requests.get(url, headers=headers)
    return list(csv.reader(io.StringIO(resp.text)))

In [4]:
# copy and paste (and edit as needed) queries to retrieve data

# npo partial orders query for neurons that don't pass through layers
# returns an adjacency list
example_query = """
select distinct
?s
?v1
?v2
where
{
VALUES ?s {
mmset1:1
}
?s ilxtr:neuronPartialOrder ?o .
?o (rdf:rest|rdf:first)* ?r1 .
?o (rdf:rest|rdf:first)* ?r2 .
?r1 rdf:rest|rdf:first ?v1 .
?r2 rdf:rest|rdf:first ?v2 .
?v1 rdf:type owl:Class .
?mediator rdf:first ?v1 .  # car
?mediator rdf:rest*/rdf:first/rdf:first ?v2 .  # caadr
} order by ?s ?v1 ?v2 limit 900
"""

result = query(example_query)

In [5]:
result

[['s', 'v1', 'v2'],
 ['http://uri.interlex.org/tgbugs/uris/readable/sparc-nlp/mmset1/1',
  'http://purl.obolibrary.org/obo/UBERON_0011390',
  'http://purl.obolibrary.org/obo/UBERON_0004917'],
 ['http://uri.interlex.org/tgbugs/uris/readable/sparc-nlp/mmset1/1',
  'http://purl.obolibrary.org/obo/UBERON_0022278',
  'http://purl.obolibrary.org/obo/UBERON_0011390']]