In [None]:
import pandas as pd
import requests
import itertools
import ipywidgets as widgets
from tqdm.notebook import tqdm, trange
from alive_progress import alive_bar
from IPython.display import display, clear_output

In [None]:
class App:
    
    def __init__(self):
        self.container = widgets.Output()
        self.button = self.create_button()
        self.final_container = widgets.VBox([self.button,self.container])
        
        
    def create_button(self):    
        button = widgets.Button(description='Start', tooltip='start the query', disabled=False)
        button.on_click(self.on_click_but)
        return(button)
        
    def on_click_but(self,_):    
        self.button.disabled=True
        with self.container:
            inf_api = 'https://www.infrafrontier.eu/dib/solr/strainsearch/select?q=*:*&rows=5000'

            query = requests.get(inf_api).json()["response"]["docs"]
            inf_data = pd.DataFrame(query)

            INDEX_PAGE = "https://idr.openmicroscopy.org/webclient/?experimenter=-1"

            # create http session
            with requests.Session() as session:
                request = requests.Request('GET', INDEX_PAGE)
                prepped = session.prepare_request(request)
                response = session.send(prepped)
                if response.status_code != 200:
                    response.raise_for_status()

            genes = list(itertools.chain.from_iterable(inf_data["gene_symbol"].tolist()))
            genes = list(dict.fromkeys(genes))
            data = pd.DataFrame(columns=["Gene Symbol", "# of phenotypes", "phenotype names", "phenotype ids"])

            col0 = []
            col1 = []
            col2 = []
            col3 = []
            KEY_VALUE_SEARCH = "https://idr.openmicroscopy.org/searchengine/api/v1/resources/{type}/search/?key={key}&value={value}"

            # this takes ~20 min.
            self.container.clear_output(wait=False)
            #with alive_bar(len(genes), spinner=None, theme="smooth", force_tty=True) as bar:
            for i in trange(len(genes)):
                qs1 = {
                    'type': 'image',
                    'key': 'Gene Symbol',
                    'value': genes[i]}
                url = KEY_VALUE_SEARCH.format(**qs1)
                json = session.get(url).json()

                num_phen = []
                pheno_id = []
                pheno_name = []

                if 'results' in json['results']:
                    images = json['results']['results']
                    for image in images:
                        try:
                            dic = pd.DataFrame(image["key_values"])
                            if not dic.loc[dic['name'] == "Phenotype Term Accession"].empty:
                                pheno_id = pheno_id + list(dic.loc[dic['name'] == "Phenotype Term Accession"]["value"].values)
                                pheno_name = pheno_name + list(dic.loc[dic['name'] == "Phenotype Term Name"]["value"].values)
                        except IndexError:
                            continue

                    # removing duplicates and nan
                    pheno_id = list(dict.fromkeys(pheno_id))
                    pheno_id = [x for x in pheno_id if str(x) != 'nan']
                    pheno_name = list(dict.fromkeys(pheno_name))
                    pheno_name = [x for x in pheno_name if str(x) != 'nan']

                    col0.append(genes[i])
                    col1.append(len(pheno_id))
                    col2.append(pheno_name)
                    col3.append(pheno_id)

                    del json

        #self.container.clear_output(wait=False)
        data["Gene Symbol"] = col0
        data["# of phenotypes"] = col1
        data["phenotype names"] = col2
        data["phenotype ids"] = col3

        # total of 2488 elements, 1578 without pheno data (910 with)

        data = data.sort_values("# of phenotypes", ascending=False)
        display(data)

app = App()    
app.final_container 
