In [None]:
import pandas as pd
import requests
import shutil

def load_table(protein_atlas_tsv_file='/Users/dganguli/Downloads/proteinatlas.tsv'):
    df = pd.read_table(protein_atlas_tsv_file)
    cols_to_keep = ['Gene', 'Antibody', 'Ensembl']
    new_cols_map = dict(zip(cols_to_keep, ['gene', 'antibodies', 'ensembl_id']))
    df = df.loc[:, cols_to_keep]
    df = df.rename(columns=new_cols_map)
    df = df.dropna()
    return df

def antibody_ids_for(ensembl_id, df):
    aids = df[df.ensembl_id==ensembl_id].antibodies
    aids = aids.values[0].split(',')
    munged_aids = []
    for aid in aids:
        aid = aid.strip()
        if aid.startswith('CAB'):
            aid = aid.split('CAB')[1]
        elif aid.startswith('HPA'):
            aid = aid.split('HPA')[1]
        else:
            raise ValueError('Unrecognized antibody type: {}'.format(aid))
        aid = aid.lstrip('0')
        munged_aids.append(aid)
        
    return munged_aids

def plates(max_plates=2000):
    return range(max_plates)

def wells():
    cols = list(map(lambda x: x+1, list(range(12))))
    rows = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
    res = ('{}{}'.format(y,x) for x in cols for y in rows)
    return res

def fields(max_fields=10):
    return range(max_fields)
    
def channels():
    return ["blue", "green", "red", "yellow"]

def make_url(aid, plate, well, field, ch):
    base_url = "https://www.proteinatlas.org/images/{}/{}_{}_{}_{}.jpg"
    url = base_url.format(aid, plate, well, field, ch)
    return url

def download_img(url, path, debug=False):
    if debug:
        print(url)
    r = requests.get(url, stream=True)
    if r.status_code == 200:
        with open(path, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)
    else:
        r.raise_for_status()

In [None]:
df = load_table(protein_atlas_tsv_file='/Users/dganguli/Downloads/proteinatlas.tsv')
e_id = "ENSG00000134057" #ensemble id for CCNB1
a_ids = antibody_ids_for(e_id, df) #antibody IDs for this gene

# look for downloadable images
for a_id in a_ids:
    for plate in plates(max_plates=2000):
        for well in wells(): #96 well plates A1->H12
            for field in fields(max_fields=10):
                for ch in ['red', 'blue', 'green', 'yellow']:
                    url = make_url(a_id, plate, well, field, ch=ch)
                    r = requests.get(url)
                    if(r.status_code==200):
                        print(url)

In [None]:
len(df)

In [None]:
aids