In [1]:
import requests
import json

In [2]:
INDEX_PAGE = "https://idr-testing.openmicroscopy.org/webclient/?experimenter=-1"
SEARCH_ENGINE_URL = "https://idr-testing.openmicroscopy.org/searchengine/api/v1/resources/{type}/search/"

In [3]:
# URL to use mapr
MAPR_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true"
SCREENS_PROJECTS_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/?value={value}"
PLATES_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}"
DATASETS_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/datasets/?value={value}&id={project_id}"
IMAGES_URL = "https://idr-testing.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}"


In [4]:
# create http session
with requests.Session() as session:
    request = requests.Request('GET', INDEX_PAGE)
    prepped = session.prepare_request(request)
    response = session.send(prepped)
    if response.status_code != 200:
        response.raise_for_status()

In [5]:
KEY_VALUE_SEARCH = SEARCH_ENGINE_URL + "?key={key}&value={value}"

In [25]:
KEY = "Gene Symbol"
KEY_MAPR = "gene"
GENES = ["pax1", "pep"]

In [26]:
# Helper method retrieving the result using directly the search api
def load_using_search_api():
    results = {}
    for gene in GENES:
        ids = []
        qs1 = {'type': 'image', 'key': KEY, 'value': gene}
        url = KEY_VALUE_SEARCH.format(**qs1)  
        json = session.get(url).json()
        images = json['results']['results']
        for image in images:
            if image['id'] not in ids:
                ids.append(image['id'])
        results[gene.lower()] = ids
    return results

In [27]:
def get_genes():
    genes = []
    for gene in GENES:
        qs1 = {'key': KEY_MAPR, 'value': gene}
        url = MAPR_URL.format(**qs1)
        json = session.get(url).json()
        for m in json['maps']: 
            genes.append(m['id'])
    return genes

def parse_annotation(images, json_data, gene, name, data_type):
    screen_name = "-"
    plate_name = "-"
    project_name = "-"
    dataset_name = "-"
    if data_type == 'datasets':
        project_name = name
    else:
        screen_name = name
     
    for p in json_data[data_type]:
        parent_id = p['id']
        if data_type == 'datasets':
            dataset_name = p['name']
        else:
            plate_name = p['name']
        qs3 = {'key': KEY_MAPR, 'value': gene,
                'parent_type': data_type[:-1], 'parent_id': parent_id}
        url3 = IMAGES_URL.format(**qs3)
        json = session.get(url3).json()
        for i in json['images']:
            if i['id'] not in images:
                images.append(i['id'])
                                
def load_using_mapr():
    results = {}
    genes = get_genes()
    images = []
    for gene in genes:
        qs1 = {'key': KEY_MAPR, 'value': gene}
        url1 = MAPR_URL.format(**qs1)
        json = session.get(url1).json()
        for m in json['maps']:
            qs2 = {'key': KEY_MAPR, 'value': gene}
            url2 = SCREENS_PROJECTS_URL.format(**qs2)
            json = session.get(url2).json()
            for s in json['screens']:
                gene = s['extra']['value']
                qs3 = {'key': KEY_MAPR, 'value': gene, 'screen_id': s['id']}
                url3 = PLATES_URL.format(**qs3)
                parse_annotation(images, session.get(url3).json(), gene, s['name'], 'plates')
            for p in json['projects']:
                gene = s['extra']['value']
                qs3 = {'key': KEY_MAPR, 'value': gene, 'project_id': p['id']}
                url3 = DATASETS_URL.format(**qs3)
                parse_annotation(images, session.get(url3).json(), gene, p['name'], 'datasets')
        results[gene.lower()] = images
    return results
    

In [28]:
%%time
results = load_using_search_api()

CPU times: user 62.7 ms, sys: 19.9 ms, total: 82.6 ms
Wall time: 906 ms


In [29]:
%%time
results_mapr = load_using_mapr()

CPU times: user 292 ms, sys: 50.4 ms, total: 342 ms
Wall time: 3.83 s


In [30]:
def dict_compare(d1, d2):
    d1_keys = set(d1.keys())
    d2_keys = set(d2.keys())
    shared_keys = d1_keys.intersection(d2_keys)
    added = d1_keys - d2_keys
    removed = d2_keys - d1_keys  
    modified = {o : (d1[o], d2[o]) for o in shared_keys if d1[o].sort() != d2[o].sort()}
    same = set(o for o in shared_keys if d1[o].sort() == d2[o].sort())
    return added, removed, modified, same

In [31]:
added, removed, modified, same = dict_compare(results, results_mapr)

In [32]:
assert len(added) == 0
assert len(removed) == 0
assert len(modified) == 0
assert len(same) == len(GENES)