# OpenNeuro Dataset Query by NIDM-Terms Example

In [249]:
import ipywidgets as widgets
import json
import glob, os
from os import system
from os.path import join,basename
from IPython.display import display
try:
    from cognitiveatlas.api import get_concept, get_disorder
except ImportError:
    system('python -m pip install cognitiveatlas')
    from cognitiveatlas.api import get_concept, get_disorder
import requests


In [250]:
# ask user for scicrunch API key
user_key=''
while user_key == '':
    user_key=input("Please enter your scicrunch.org API key (see scicrunch.org->My Account->API Keys): ")

Please enter your scicrunch.org API key (see scicrunch.org->My Account->API Keys): 29hUT0BKdmUEgD6KLppEH9qDItY0eQXM


In [251]:
# set up uber jsonld dictionary
data={}
# for all jsonld documents in this repo, load them into a graph
for dataset in glob.glob("../../terms/OpenNeuro_BIDS_terms/**"):
    # set top-level data key to datset number
    data[basename(dataset)] = {}
    # loop through all jsonld files and get isAbouts
    for jsonldfile in glob.glob(join(dataset,"*.jsonld")):
        #load jsonld file
        #print("Loading jsonld file: %s for datset: %s" %(basename(jsonldfile),basename(dataset)))
        with open(jsonldfile) as f:
            data[basename(dataset)] = json.load(f)

    

In [360]:
# find all isAbout concepts in data dictionary
isAbout_terms=[]
for key,val in data.items():
    for subkey,subval in data[key].items():
        if ("isAbout" in subkey) and ([subval] not in isAbout_terms):
            isAbout_terms.append([subval])

In [361]:
for stuff in isAbout_terms:
    print(stuff)

["['http://uri.interlex.org/ilx_0100400']"]
["['http://uri.interlex.org/ilx_0104886']"]
["['http://uri.interlex.org/ilx_0103955']"]
["['https://www.cognitiveatlas.org/concept/json/trm_5159c70d0e98e/']"]
["['http://uri.interlex.org/ilx_0105510']"]
["['http://uri.interlex.org/ilx_0102388 ', ' https://www.cognitiveatlas.org/concept/id/trm_4a3fd79d0af66/']"]
["['http://cognitiveatlas.org/concept/json/trm_4a3fd79d0b5a7/']"]
["['http://uri.interlex.org/ilx_0112866']"]
["['https://www.cognitiveatlas.org/concept/json/trm_52b5f1ef4f9cc/', ' http://cognitiveatlas.org/concept/json/trm_4a3fd79d0a9dc/']"]
["['https://www.cognitiveatlas.org/concept/json/trm_5022ef7599294/']"]


In [367]:
# isAbout concept URLs don't have any human-readable labels at this point so let's go and try to retrieve them
# first let's get labels for all the cognitive atlas isAbout concepts from their API
isAbout_term_labels={}
scicrunch_base_uri = 'https://scicrunch.org/api/1/ilx/search/curie/'
for terms in isAbout_terms:
    for url in terms:     
        # retrieve label from cog atlas
        if "cognitiveatlas" in url:
            # parse out id of term and get using cog atlas python tool...
            id = url.rsplit('/',1)[0].rsplit('/',1)[1]
            # don't know if this is a concept or disorder so we'll try both
            tmp = get_concept(id=id,silent=True)
            isAbout_term_labels[url] = tmp.json['name'].lower()
            #print("cogatlas label: %s" %isAbout_term_labels[url])
        elif "interlex" in url:
            # get label for interlex terms
            payload={}
            headers={}
            full_url = scicrunch_base_uri + url.rsplit('/',1)[1].replace('_',':').rstrip("']'") + "?key=" + user_key
            response = requests.request("GET",full_url,headers=headers,data=payload)
            # response is a json dictionary. here we want the label
            isAbout_term_labels[url] = response.json()["data"]["label"].lower()
            #print("interlex label: %s" %isAbout_term_labels[url] )
            

In [371]:
print(isAbout_term_labels)

{"['http://uri.interlex.org/ilx_0100400']": 'age', "['http://uri.interlex.org/ilx_0104886']": 'handedness assessment', "['http://uri.interlex.org/ilx_0103955']": 'ethnicity', "['https://www.cognitiveatlas.org/concept/json/trm_5159c70d0e98e/']": 'sleep', "['http://uri.interlex.org/ilx_0105510']": 'institution', "['http://uri.interlex.org/ilx_0102388 ', ' https://www.cognitiveatlas.org/concept/id/trm_4a3fd79d0af66/']": 'response inhibition', "['http://cognitiveatlas.org/concept/json/trm_4a3fd79d0b5a7/']": 'working memory', "['http://uri.interlex.org/ilx_0112866']": 'sex', "['https://www.cognitiveatlas.org/concept/json/trm_52b5f1ef4f9cc/', ' http://cognitiveatlas.org/concept/json/trm_4a3fd79d0a9dc/']": 'naming', "['https://www.cognitiveatlas.org/concept/json/trm_5022ef7599294/']": 'anxiety'}


In [369]:

# temporary variables for query
global currentQueryTerm, queryTerms
currentQueryTerm =''
queryTerms = []
# handler for drop down
def dropdown_eventhandler(change):
    #if change['name'] == 'value' and (change['new'] != change['old']):
    if change['name'] == 'value' and (change['new']):
        # set currentQueryTerm to selection
        #print("current query term changed %s" %change['new'])
        global currentQueryTerm
        currentQueryTerm = change['new']
        
            
# add handlers for simple GUI
def btn_addConceptToQuery(obj):
    global currentQueryTerm
    global queryTerms
    if (currentQueryTerm not in queryTerms) and (currentQueryTerm != ''):
        #add to query Terms
        queryTerms.append(currentQueryTerm)
        print("Query: %s " %queryTerms)
        
# handlers for buttons
def btn_reset(obj):
    global queryTerms
    queryTerms = []
    print("query terms reset: %s" %queryTerms)
def ANDQuery(obj):
    # this function performs an AND query vs. an OR query on concepts
    # to do an AND query we look through all the matching_datasets for each of the concepts and select datasets
    # that satisfy all of them
    
    matching_datasets = doQuery()
    and_query_result = []
    # get number of concepts in the current query
    num_concepts = len(matching_datasets.keys())
    # number of matches for each dataset.  If this number equals num_concepts then it satisfies the AND 
    # criteria
    num_matches={}
    for concept,datasets in matching_datasets.items():
        for dataset in datasets:
            # does this dataset appear across all matching_datasets[term]?
            # is so add it to the num_matches dictionary with the dataset identifier as the key
            if dataset in num_matches.keys():
                # increment counter
                num_matches[dataset] = num_matches[dataset] + 1
            else:
                num_matches[dataset] = 1
    # now store and_query_result
    for dataset,matches in num_matches.items():
        if matches == num_concepts:
            and_query_result.append(dataset)
    

    if len(and_query_result) == 0:
        print("AND Matching datasets: None")
    else:
        print("AND Matching datasets: ")
        for match in and_query_result:
              print(match)

def ORQuery(obj):       
    # an OR query here is basically returning any datasets that are in the resulting structure
    
    matching_datasets = doQuery()
    or_query_result=[]
    for concept,datasets in matching_datasets.items():
        for items in datasets:
            or_query_result.append(items)

    if len(or_query_result) == 0:
        print("OR Matching datasets: None")
    else:
        print("OR Matching datasets:")
        for match in or_query_result:
            print(match)
            
def doQuery():
    global queryTerms
    matching_datasets={}
    print("Running query on datasets for terms: %s" %queryTerms)
    for term in queryTerms:
        # run query by looking for url matching queryTerms in isAbout_terms_labels
        # which has mapping between isAbout URL and it's label
        for isabout_key, isabout_value in isAbout_term_labels.items():
            # check if isAbout_terms_labels value is the term we're looking for
            if isabout_value == term:
                matching_datasets[term] = []
                # sometimes we have more than 1 isAbout URL so loop through them looking
                # for a match wtih our query term URL
                for dataset,dataset_annotations in data.items():
                    for key,value in dataset_annotations.items():
                        #print("looking for isAbout match (%s,%s)" %([isabout_key],value))
                        if (key=='isAbout')and (isabout_key in value):
                            matching_datasets[term].append("https://openneuro.org/datasets/ds" + dataset)
                           
    return matching_datasets
            
        
    
    

In [370]:
# bring up a new selector for adding a concept to the query
queryTermSelector = widgets.Dropdown(options=list(isAbout_term_labels.values()))
queryTermSelector.observe(dropdown_eventhandler)
# create a little user interface to query using isAbout concepts
addConceptToQuery = widgets.Button(description='Add Query Concept')
addConceptToQuery.on_click(btn_addConceptToQuery)
doANDQuery = widgets.Button(description='Run AND query')
doANDQuery.on_click(ANDQuery)
doORQuery = widgets.Button(description='Run OR query')
doORQuery.on_click(ORQuery)
resetQuery = widgets.Button(description='Reset query')
resetQuery.on_click(btn_reset)
print("Please select concepts from the dropdown to include in a query:")
display(queryTermSelector)
display(addConceptToQuery)
display(doANDQuery)
display(doORQuery)
display(resetQuery)

Please select concepts from the dropdown to include in a query:


Dropdown(options=('age', 'handedness assessment', 'ethnicity', 'sleep', 'institution', 'response inhibition', …

Button(description='Add Query Concept', style=ButtonStyle())

Button(description='Run AND query', style=ButtonStyle())

Button(description='Run OR query', style=ButtonStyle())

Button(description='Reset query', style=ButtonStyle())

Query: ['sex'] 
Query: ['sex', 'institution'] 
Running query on datasets for terms: ['sex', 'institution']
AND Matching datasets: None
Running query on datasets for terms: ['sex', 'institution']
OR Matching datasets:
https://openneuro.org/datasets/ds001750
https://openneuro.org/datasets/ds001554
https://openneuro.org/datasets/ds001722
https://openneuro.org/datasets/ds001502
https://openneuro.org/datasets/ds001242
https://openneuro.org/datasets/ds001919
query terms reset: []
Query: ['age'] 
Running query on datasets for terms: ['age']
AND Matching datasets: 
https://openneuro.org/datasets/ds001493
https://openneuro.org/datasets/ds000122
https://openneuro.org/datasets/ds001460
https://openneuro.org/datasets/ds001652
https://openneuro.org/datasets/ds001608
https://openneuro.org/datasets/ds001450
https://openneuro.org/datasets/ds001461
https://openneuro.org/datasets/ds001299
https://openneuro.org/datasets/ds000239
https://openneuro.org/datasets/ds000005
https://openneuro.org/datasets/ds001