In [17]:
import requests
import json

### URL to use to search via search engine

In [18]:
INDEX_PAGE = "https://idr-testing.openmicroscopy.org/webclient/?experimenter=-1"
SEARCH_ENGINE_URL = "https://idr-testing.openmicroscopy.org/searchengine/api/v1/resources/{type}/"
KEY_VALUE_SEARCH = SEARCH_ENGINE_URL + "search/?key={key}&value={value}"
KEYS_SEARCH = SEARCH_ENGINE_URL + "searchvaluesusingkey/?key={key}"

### URLs to use to search via ``mapr``

In [19]:
# URL to use mapr
MAPR_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true"
SCREENS_PROJECTS_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/?value={value}"
PLATES_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}"
DATASETS_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/datasets/?value={value}&id={project_id}"
IMAGES_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}"


In [20]:
# create http session
with requests.Session() as session:
    request = requests.Request('GET', INDEX_PAGE)
    prepped = session.prepare_request(request)
    response = session.send(prepped)
    if response.status_code != 200:
        response.raise_for_status()

In [21]:
# Key used by search engine
KEY = "Gene Symbol"
# Mapr equivalent key
KEY_MAPR = "gene"

### Load all the values for a specific key.
Only non empty value will be considered.

In [22]:
# Helper method to load the possible values for a given key
def load_values_for_given_key():
    values = []
    qs1 = {'type': 'image', 'key': KEY}
    url = KEYS_SEARCH.format(**qs1)  
    json = session.get(url).json()
    for d in json['data']:
        if d['Value']:
            values.append(d['Value'])
    return values

In [47]:
values = load_values_for_given_key()

In [48]:
values.sort()

In [49]:
#values = ['agap5']

### Helper method to retrieve images using the search engine

In [31]:
# Helper method retrieving the result using directly the search api
def load_using_search_api(values):
    results = {}
    for item in values:
        ids = []
        qs1 = {'type': 'image', 'key': KEY, 'value': item}
        url = KEY_VALUE_SEARCH.format(**qs1)  
        json = session.get(url).json()
        if 'results' in json['results']:
            images = json['results']['results']
            for image in images:
                if image['id'] not in ids:
                    ids.append(image['id'])
        results[item.lower()] = ids
    return results

### Helper method to retrieve images using ``mapr``

In [37]:
def get_items(values):
    items = []
    not_found = []
    for item in values:
        qs1 = {'key': KEY_MAPR, 'value': item}
        url = MAPR_URL.format(**qs1)
        json = session.get(url).json()
        if len(json['maps']) == 0:
            not_found.append(item)
        for m in json['maps']: 
            items.append(m['id'])
    return items, not_found

def parse_annotation(images, json_data, item, name, data_type):
    screen_name = "-"
    plate_name = "-"
    project_name = "-"
    dataset_name = "-"
    if data_type == 'datasets':
        project_name = name
    else:
        screen_name = name
     
    for p in json_data[data_type]:
        parent_id = p['id']
        if data_type == 'datasets':
            dataset_name = p['name']
        else:
            plate_name = p['name']
        qs3 = {'key': KEY_MAPR, 'value': item,
                'parent_type': data_type[:-1], 'parent_id': parent_id}
        url3 = IMAGES_URL.format(**qs3)
        json = session.get(url3).json()
        for i in json['images']:
            if i['id'] not in images:
                images.append(i['id'])
                                
def load_using_mapr(values):
    results = {}
    items, not_found = get_items(values)
    images = []
    for item in items:
        qs1 = {'key': KEY_MAPR, 'value': item}
        url1 = MAPR_URL.format(**qs1)
        json = session.get(url1).json()
        for m in json['maps']:
            qs2 = {'key': KEY_MAPR, 'value': item}
            url2 = SCREENS_PROJECTS_URL.format(**qs2)
            json = session.get(url2).json()
            for s in json['screens']:
                item = s['extra']['value']
                qs3 = {'key': KEY_MAPR, 'value': item, 'screen_id': s['id']}
                url3 = PLATES_URL.format(**qs3)
                parse_annotation(images, session.get(url3).json(), item, s['name'], 'plates')
            for p in json['projects']:
                item = p['extra']['value']
                qs3 = {'key': KEY_MAPR, 'value': item, 'project_id': p['id']}
                url3 = DATASETS_URL.format(**qs3)
                parse_annotation(images, session.get(url3).json(), item, p['name'], 'datasets')
        results[item.lower()] = images
    for n in not_found:
        results[n.lower()] = []
    return results
    

### Search using search engine 

In [50]:
# number of values to search for
s = 0
e = 500

In [51]:
%%time
results = load_using_search_api(values[s:e])

CPU times: user 11.6 s, sys: 3.31 s, total: 14.9 s
Wall time: 3min 6s


### Search using ``mapr`` 

In [52]:
%%time
results_mapr = load_using_mapr(values[s:e])

[' ciz1', ' spen', '128up', '14-3-3epsilon', '14-3-3zeta', '140up', '15e2_human', '18w', '26-29-p', '2mit', '312', '4ehp', '5-ht1a', '5-ht1b', '5-ht2', '5-ht2a', '5-ht2b', '5-ht7', '5ptasei', '76p', '7a5', '7b2', '7h3', '825-oak', 'a', 'a10', 'a16', 'a1bg', 'a1bg-as1', 'a1cf', 'a1l167_human', 'a2agn1_human', 'a2bp1', 'a2bp1_human', 'a2ld1', 'a2m', 'a2m-as1', 'a2ml1', 'a2rum1_human', 'a3-3', 'a3galt2', 'a4galt', 'a4gnt', 'a5', 'a6', 'aaas', 'aac1', 'aac11', 'aac3', 'aacs', 'aad10', 'aad14', 'aad15', 'aad16', 'aad3', 'aad4', 'aad6', 'aadac', 'aadacl1', 'aadacl2', 'aadacl3', 'aadacl4', 'aadat', 'aaed1', 'aagab', 'aah1', 'aah3', 'aak1', 'aamdc', 'aamp', 'aanat', 'aap1', 'aar2', 'aars', 'aars2', 'aarsd1', 'aasdh', 'aasdhppt', 'aass', 'aat', 'aat1', 'aat2', 'aatf', 'aatk', 'aatk-as1', 'aats-ala', 'aats-ala-m', 'aats-arg', 'aats-asn', 'aats-asp', 'aats-cys', 'aats-gln', 'aats-glupro', 'aats-gly', 'aats-his', 'aats-ile', 'aats-leu', 'aats-lys', 'aats-met', 'aats-phe', 'aats-pro', 'aats-ser', '

### Compare the outputs of the search

The checks below compare the keys e.g. gene list and the values i.e. image ids

In [41]:
def dict_compare(d1, d2):
    d1_keys = set(d1.keys())
    d2_keys = set(d2.keys())
    shared_keys = d1_keys.intersection(d2_keys)
    added = d1_keys - d2_keys
    removed = d2_keys - d1_keys  
    modified = {o : (d1[o], d2[o]) for o in shared_keys if d1[o].sort() != d2[o].sort()}
    same = set(o for o in shared_keys if d1[o].sort() == d2[o].sort())
    return added, removed, modified, same

In [42]:
added, removed, modified, same = dict_compare(results, results_mapr)

In [43]:
assert len(added) == 0
assert len(removed) == 0
assert len(modified) == 0
assert len(same) == e - s