# OpenNeuro Dataset Query by NIDM-Terms Example

In [1]:
import ipywidgets as widgets
import json
import os
from os import system
from os.path import join,basename
from IPython.display import display
try:
    from cognitiveatlas.api import get_concept, get_disorder
except ImportError:
    system('python -m pip install cognitiveatlas')
    from cognitiveatlas.api import get_concept, get_disorder
try:
    import glob2
except ImportError:
    system('python -m pip install glob2')
    import glob2
import requests


In [2]:
# set up uber jsonld dictionary
data={}
# for all jsonld documents in this repo, load them into a graph
for dataset in glob2.glob("../../terms/OpenNeuro_Datasets_terms/OpenNeuro_jsonld/**"):
    #print(basename(dataset))
    # set top-level data key to datset number
    data[basename(dataset)] = {}
    # loop through all jsonld files and get isAbouts
    for jsonldfile in glob2.glob(join(dataset,"**","*.jsonld")):
        #load jsonld file
        #if basename(dataset) == '000001':
        #print("Loading jsonld file: %s for dataset: %s" %(jsonldfile,basename(dataset)))
        with open(jsonldfile) as f:
            # load jsonld document and set key for each variable to source_variable
            tmp = json.load(f)
            # since we have single jsonld files for all variables in dataset.  If there's
            # only 1 variable in the dataset then it's a dictionary else it's a list of 
            # dictionaries
            if type(tmp['terms']) is dict:
                data[basename(dataset)][tmp['terms']['sourceVariable']] = tmp['terms'] 
                    
            elif type(tmp['terms']) is list:
                for var in tmp['terms']: 
                    #print(var)
                    data[basename(dataset)][var['sourceVariable']] = var
        #print(data)
   

In [3]:
# find all isAbout concepts in data dictionary
isAbout_term_labels={}
for key,val in data.items():
    for subkey,subval in data[key].items():
        for variable,jsonld_elements in data[key][subkey].items():
            #print("variable=%s" %variable)
            #print("json_elements=%s" %jsonld_elements)
            if (variable == "isAbout"):
                # isAbout concepts stored as dictionary if single item or 
                # list of dictionaries if multiple items
                if isinstance(jsonld_elements,dict):
                    #print(jsonld_elements)
                    if (jsonld_elements['@id'] not in isAbout_term_labels.keys()) and (jsonld_elements['label'] != ""):
                        isAbout_term_labels[jsonld_elements['@id']] = jsonld_elements['label']
                        
                # here we have multiple isAbouts
                elif isinstance(jsonld_elements,list):
                    for elements in jsonld_elements:
                        #print(elements)
                        if (elements['@id'] not in isAbout_term_labels.keys()) and (elements['label'] != "") :
                            isAbout_term_labels[elements['@id']] = elements['label']
                    
                     

In [4]:

# temporary variables for query
global currentQueryTerm, queryTerms
currentQueryTerm =''
queryTerms = []
# handler for drop down
def dropdown_eventhandler(change):
    #if change['name'] == 'value' and (change['new'] != change['old']):
    if change['name'] == 'value' and (change['new']):
        # set currentQueryTerm to selection
        #print("current query term changed %s" %change['new'])
        global currentQueryTerm
        currentQueryTerm = change['new']
        
            
# add handlers for simple GUI
def btn_addConceptToQuery(obj):
    global currentQueryTerm
    global queryTerms
    if (currentQueryTerm not in queryTerms) and (currentQueryTerm != ''):
        #add to query Terms
        queryTerms.append(currentQueryTerm)
        print("Query: %s " %queryTerms)
        
# handlers for buttons
def btn_reset(obj):
    global queryTerms
    queryTerms = []
    print("query terms reset: %s" %queryTerms)
def ANDQuery(obj):
    # this function performs an AND query vs. an OR query on concepts
    # to do an AND query we look through all the matching_datasets for each of the concepts and select datasets
    # that satisfy all of them
    
    matching_datasets = doQuery()
    
    and_query_result = []
    # get number of concepts in the current query
    num_concepts = len(matching_datasets.keys())
    # number of matches for each dataset.  If this number equals num_concepts then it satisfies the AND 
    # criteria
    num_matches={}
    for concept,datasets in matching_datasets.items():
        for dataset in datasets:
            # does this dataset appear across all matching_datasets[term]?
            # is so add it to the num_matches dictionary with the dataset identifier as the key
            if dataset in num_matches.keys():
                # increment counter
                num_matches[dataset] = num_matches[dataset] + 1
            else:
                num_matches[dataset] = 1
    
    # now store and_query_result
    for dataset,matches in num_matches.items():
        if matches == num_concepts:
            and_query_result.append(dataset)
    

    if len(and_query_result) == 0:
        print("AND Matching datasets: None")
    else:
        print("AND Matching datasets: ")
        for match in and_query_result:
              print(match)

def ORQuery(obj):       
    # an OR query here is basically returning any datasets that are in the resulting structure
    
    matching_datasets = doQuery()
    or_query_result=[]
    for concept,datasets in matching_datasets.items():
        for items in datasets:
            or_query_result.append(items)

    if len(or_query_result) == 0:
        print("OR Matching datasets: None")
    else:
        print("OR Matching datasets:")
        for match in or_query_result:
            print(match)
            
def doQuery():
    global queryTerms
    matching_datasets={}
    print("Running query on datasets for terms: %s" %queryTerms)
    for term in queryTerms:
        # run query by looking for url matching queryTerms in isAbout_terms_labels
        # which has mapping between isAbout URL and it's label
        for isabout_key, isabout_value in isAbout_term_labels.items():
            #print("isabout_value=%s" %isabout_value)
            # check if isAbout_terms_labels value is the term we're looking for
            if isabout_value == term:
                #print("found match")
                matching_datasets[term] = []
                # sometimes we have more than 1 isAbout URL so loop through them looking
                # for a match wtih our query term URL
                for dataset,dataset_variables in data.items():
                    for source_variables,dataset_annotations in dataset_variables.items():
                        #print(dataset_annotations)
                        for key,value in dataset_annotations.items():
                            #print("looking for isAbout match %s" %(str(isabout_key)))
                            #print("value: %s" %str(value))
                            if (str(key)=='isAbout') and (str(isabout_key) in str(value)):
                                #print("found match")
                                # if dataset isn't already in the matching_datasets list then append
                                dataset_url = "https://openneuro.org/datasets/ds" + dataset
                                if dataset_url not in matching_datasets[term]:
                                    matching_datasets[term].append("https://openneuro.org/datasets/ds" + dataset)
                           
    return matching_datasets
            
        
    
    

In [5]:
import json
with open ("openneuro_context_annotations.json","w") as outfile:
    json.dump(isAbout_term_labels,outfile, indent=2)

In [6]:
# bring up a new selector for adding a concept to the query
queryTermSelector = widgets.Dropdown(options=list(isAbout_term_labels.values()))
queryTermSelector.observe(dropdown_eventhandler)
# create a little user interface to query using isAbout concepts
addConceptToQuery = widgets.Button(description='Add Query Concept')
addConceptToQuery.on_click(btn_addConceptToQuery)
doANDQuery = widgets.Button(description='Run AND query')
doANDQuery.on_click(ANDQuery)
doORQuery = widgets.Button(description='Run OR query')
doORQuery.on_click(ORQuery)
resetQuery = widgets.Button(description='Reset query')
resetQuery.on_click(btn_reset)
print("Please select concepts from the dropdown to include in a query:")
display(queryTermSelector)
display(addConceptToQuery)
display(doANDQuery)
display(doORQuery)
display(resetQuery)

Please select concepts from the dropdown to include in a query:


Dropdown(options=('age', 'sex', 'participant group', 'handedness assessment', 'gender', 'patients birth date',…

Button(description='Add Query Concept', style=ButtonStyle())

Button(description='Run AND query', style=ButtonStyle())

Button(description='Run OR query', style=ButtonStyle())

Button(description='Reset query', style=ButtonStyle())

Query: ['response inhibition'] 
Query: ['response inhibition', 'age'] 
Running query on datasets for terms: ['response inhibition', 'age']
AND Matching datasets: 
https://openneuro.org/datasets/ds002041
https://openneuro.org/datasets/ds000115
https://openneuro.org/datasets/ds000240
https://openneuro.org/datasets/ds000030
Running query on datasets for terms: ['response inhibition', 'age']
OR Matching datasets:
https://openneuro.org/datasets/ds002041
https://openneuro.org/datasets/ds000115
https://openneuro.org/datasets/ds000240
https://openneuro.org/datasets/ds000030
https://openneuro.org/datasets/ds000122
https://openneuro.org/datasets/ds001838
https://openneuro.org/datasets/ds001460
https://openneuro.org/datasets/ds001652
https://openneuro.org/datasets/ds000113
https://openneuro.org/datasets/ds001608
https://openneuro.org/datasets/ds003399
https://openneuro.org/datasets/ds003352
https://openneuro.org/datasets/ds002422
https://openneuro.org/datasets/ds002842
https://openneuro.org/datas