In [1]:
import pandas as pd

In [2]:
import urllib
import xmltodict

In [3]:
#samples = pd.read_csv( 'tumor_details.csv' )

In [4]:
#samples.head()

In [5]:
def do_request( url ):
    file = urllib.request.urlopen( url )
    data = file.read()
    file.close()
    data = xmltodict.parse(data)
    return data

In [6]:
def get_all_samples():
    return do_request( 'http://api.brain-map.org/api/v2/data/query.xml?criteria=model::Specimen,rma::criteria,products[name$eq%27Glioblastoma%27],rma::options[num_rows$eqall]' )

In [7]:
all_samples_resp = get_all_samples()

In [8]:
all_specimens = sorted( [ x['external-specimen-name'] for x in all_samples_resp['Response']['specimens']['specimen'] ] )
print( 'Found {} samples'.format( len(all_specimens) ) )

Found 948 samples


In [9]:
def get_sample_detail( specimen='W1-1-2-A.1.01' ):
    return do_request( "http://api.brain-map.org/api/v2/data/query.xml?criteria=model::SectionDataSet,rma::criteria,specimen[external_specimen_name$eq'{}'],rma::include,genes,sub_images".format(specimen) )

In [26]:
def get_all_section_images( specimen='W1-1-2-A.1.01' ):
    data = get_sample_detail( specimen )
    data = data['Response']['section-data-sets']['section-data-set']
    
    out = []
    if type(data) != list:
        data = [data]
    
    for a in data:
        if a['genes'] is not None:
            out.append(
                (
                    a['genes']['gene']['acronym'],
                    a['sub-images']['sub-image']['id']['#text'],
                    a['sub-images']['sub-image']['section-number']['#text']
                )
            )
        else:
            for i,sub_image in enumerate( a['sub-images']['sub-image'] ):
                try:
                    out.append( ('H+E', sub_image['id']['#text'], sub_image['section-number']['#text']) )
                except:
                    pass
                
    for i,a in enumerate(out):
        a = out[i]
        out[i] = ( a[0], int(a[1]), int(a[2]) )
        
    return sorted( out, key=lambda x: x[2] )

In [27]:
get_all_section_images( specimen='W1-1-2-D.2.01' )

[('H+E', 298107517, 1),
 ('H+E', 298107519, 2),
 ('H+E', 298107507, 17),
 ('H+E', 298107509, 18),
 ('H+E', 298107497, 33),
 ('H+E', 298107499, 34),
 ('H+E', 298107487, 49),
 ('H+E', 298107489, 50),
 ('H+E', 298107477, 65),
 ('H+E', 298107479, 66),
 ('H+E', 299316470, 81),
 ('H+E', 299316472, 82)]

In [12]:
def download_image_from_id( id_str, name=None, feature_map=False ):
    if not name:
        name = id_str
        
    url = "http://api.brain-map.org/api/v2/image_download/{}".format(id_str)
    if feature_map:
        url += '?view=tumor_feature_annotation'
        
    urllib.request.urlretrieve(url, "{}.jpg".format(name))

In [13]:
download_image_from_id( '292324238',feature_map=True )

In [14]:
from multiprocessing import Pool

def f(x):
    return x*x

if __name__ == '__main__':
    with Pool(5) as p:
        print(p.map(f, [1, 2, 3]))

[1, 4, 9]
