In [1]:
import requests
import json

### URL to use to search via search engine

In [2]:
INDEX_PAGE = "https://idr-testing.openmicroscopy.org/webclient/?experimenter=-1"
SEARCH_ENGINE_URL = "https://idr-testing.openmicroscopy.org/searchengine/api/v1/resources/{type}/search/"
KEY_VALUE_SEARCH = SEARCH_ENGINE_URL + "?key={key}&value={value}"

### URLs to use to search via ``mapr``

In [3]:
# URL to use mapr
MAPR_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true"
SCREENS_PROJECTS_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/?value={value}"
PLATES_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}"
DATASETS_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/datasets/?value={value}&id={project_id}"
IMAGES_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}"


In [4]:
# create http session
with requests.Session() as session:
    request = requests.Request('GET', INDEX_PAGE)
    prepped = session.prepare_request(request)
    response = session.send(prepped)
    if response.status_code != 200:
        response.raise_for_status()

In [13]:
# Key used by search engine
KEY = "Gene Symbol"
# Mapr equivalent key
KEY_MAPR = "gene"
# List of items to search for
ITEMS = ["pax1", "pep", "blah"]

### Helper method to retrieve images using the search engine

In [14]:
# Helper method retrieving the result using directly the search api
def load_using_search_api():
    results = {}
    for item in ITEMS:
        ids = []
        qs1 = {'type': 'image', 'key': KEY, 'value': item}
        url = KEY_VALUE_SEARCH.format(**qs1)  
        json = session.get(url).json()
        if 'results' in json['results']:
            images = json['results']['results']
            for image in images:
                if image['id'] not in ids:
                    ids.append(image['id'])
        results[item.lower()] = ids
    return results

### Helper method to retrieve images using ``mapr``

In [15]:
def get_items():
    items = []
    not_found = []
    for item in ITEMS:
        qs1 = {'key': KEY_MAPR, 'value': item}
        url = MAPR_URL.format(**qs1)
        json = session.get(url).json()
        if len(json['maps']) == 0:
            not_found.append(item)
        for m in json['maps']: 
            items.append(m['id'])
    return items, not_found

def parse_annotation(images, json_data, item, name, data_type):
    screen_name = "-"
    plate_name = "-"
    project_name = "-"
    dataset_name = "-"
    if data_type == 'datasets':
        project_name = name
    else:
        screen_name = name
     
    for p in json_data[data_type]:
        parent_id = p['id']
        if data_type == 'datasets':
            dataset_name = p['name']
        else:
            plate_name = p['name']
        qs3 = {'key': KEY_MAPR, 'value': item,
                'parent_type': data_type[:-1], 'parent_id': parent_id}
        url3 = IMAGES_URL.format(**qs3)
        json = session.get(url3).json()
        for i in json['images']:
            if i['id'] not in images:
                images.append(i['id'])
                                
def load_using_mapr():
    results = {}
    items, not_found = get_items()
    images = []
    for item in items:
        qs1 = {'key': KEY_MAPR, 'value': item}
        url1 = MAPR_URL.format(**qs1)
        json = session.get(url1).json()
        for m in json['maps']:
            qs2 = {'key': KEY_MAPR, 'value': item}
            url2 = SCREENS_PROJECTS_URL.format(**qs2)
            json = session.get(url2).json()
            for s in json['screens']:
                item = s['extra']['value']
                qs3 = {'key': KEY_MAPR, 'value': item, 'screen_id': s['id']}
                url3 = PLATES_URL.format(**qs3)
                parse_annotation(images, session.get(url3).json(), item, s['name'], 'plates')
            for p in json['projects']:
                item = s['extra']['value']
                qs3 = {'key': KEY_MAPR, 'value': item, 'project_id': p['id']}
                url3 = DATASETS_URL.format(**qs3)
                parse_annotation(images, session.get(url3).json(), item, p['name'], 'datasets')
        results[item.lower()] = images
    for n in not_found:
        results[n.lower()] = []
    return results
    

### Search using search engine 

In [16]:
%%time
results = load_using_search_api()

CPU times: user 79.9 ms, sys: 28.3 ms, total: 108 ms
Wall time: 1.12 s


### Search using ``mapr`` 

In [17]:
%%time
results_mapr = load_using_mapr()

CPU times: user 367 ms, sys: 75.4 ms, total: 443 ms
Wall time: 3.8 s


### Compare the outputs of the search

The checks below compare the keys e.g. gene list and the values i.e. image ids

In [18]:
def dict_compare(d1, d2):
    d1_keys = set(d1.keys())
    d2_keys = set(d2.keys())
    shared_keys = d1_keys.intersection(d2_keys)
    added = d1_keys - d2_keys
    removed = d2_keys - d1_keys  
    modified = {o : (d1[o], d2[o]) for o in shared_keys if d1[o].sort() != d2[o].sort()}
    same = set(o for o in shared_keys if d1[o].sort() == d2[o].sort())
    return added, removed, modified, same

In [19]:
added, removed, modified, same = dict_compare(results, results_mapr)

In [20]:
assert len(added) == 0
assert len(removed) == 0
assert len(modified) == 0
assert len(same) == len(ITEMS)