In [6]:
from abc import ABCMeta, abstractmethod
from urllib.parse import urlencode
import requests
from datetime import datetime
from bs4 import BeautifulSoup as bsoup
from functools import reduce
import time
import xmltodict, json
import ipdb

In [87]:
from threading import Thread

class ThreadWithReturnValue(Thread):
    def __init__(self, group=None, target=None, name=None,
                 args=(), kwargs=None, daemon=None):
        Thread.__init__(self, group=group, target=target, name=name,
                 args=args, kwargs=kwargs, daemon=daemon)
        self._return = None
    def run(self):
        if self._target is not None:
            self._return = self._target(*self._args,
                                                **self._kwargs)
    def join(self):
        Thread.join(self)
        return self._return

    
class NCBI_Searcher(metaclass=ABCMeta):
    """ 'Interface' que define a utilização da API das databases da NCBI.
    """

    search_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    meta_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
    fetch_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
    ncbi_register = {"tool":"Atena", "email":"ddddiegolima@gmail.com"}
    max_pagination = 20
    recursive = True

    def search(self, queryterms: list = None, search_type: str = None,
               start_year: int = None, end_year: int = None,
               max_records: int = None, start_record: int = None,
               author: str = None, journal: str = None, search_url: str = None):
        """
        Realiza uma pesquisa NCBI.
        @param queryterms: list of lists. Terms within the same list are
            separated by an OR. Lists are separated by an AND
        @param search_type: meta_data or querytext.
            meta_data: This field enables a free-text search of all
                configured metadata fields and the abstract.
            querytext: This field enables a free-text search of all
                fields.
        @param start_year: Start value of Publication Year to restrict results by.
        @param end_year: End value of Publication Year to restrict results by.
        @param max_records: The number of records to fetch.
        @param start_record: Sequence number of first record to fetch.
        @param author: An author's name. Searches both first name and last name
            Accepts a list of author names too.
        @param journal: An author's name. Accepts a list of journals too.
        @param search_url: Optionally you can directly specify the URL to 
            query from. Setting this parameter will ignore the other parameters.
        @return: a dictionaries list whose keys are compatible with Documento model.
        """

        term = self._search_term(queryterms, search_type=search_type)
        if author:
            author = [author] if type(author) == str else author
            author = ['%s[Author]' % a for a in author]
            term = "%s AND (%s)" % (term, " OR ".join(author) )

        if journal:
            journal = [journal] if type(journal) == str else journal
            journal = ['"%s"[Journal]' % j for j in journal]
            term = "%s AND (%s)" % (term, " OR ".join(journal) )

        fixed_payload = {"retmode": "json", "datetype": "pdat",
                         "db": self._db, "sort": self._sort_order}
        payload = {"term": term,
                   "retmax": max_records or '', "retstart": start_record or '',
                   "mindate": start_year or '', "maxdate": end_year or ''}
        payload.update(fixed_payload)
        payload.update(self.ncbi_register)
        url = search_url if search_url else "%s?%s" % (self.search_url, urlencode(payload))
        
#         print("URL SEARCH: %s" % url)
        t_00 = time.time()
        response = requests.get(url).json()['esearchresult']
        print('{:15s}{:6.3f}'.format("response",time.time() - t_00))
        quantidade_artigos = int(response['count'])
        if self.recursive:
            print("Artigos encontrados: ",quantidade_artigos)
        # Se o usuário não limitou quantidade de resultados, então traz tudo
        max_records = max_records or quantidade_artigos
        
        retorno = []
        
        ###
        ### BLOCO FANTASMA
        ###
        # Se houver necessidade de paginação...
        if quantidade_artigos > self.max_pagination and max_records > self.max_pagination:
            # self.recursive só sera True se a chamada estiver sendo feita pelo usuário.
            # Isso serve para garantir que cada chamada da função self.search
            # neste bloco só acontecerá com um nível de recursividade.
            if self.recursive:
                self.recursive = False
                
                threads = []
                payload.update({'retmax':self.max_pagination})
                for i,x in enumerate(range(20, quantidade_artigos+1, self.max_pagination)):
                    
                    payload.update({'retstart':x})
                    kwargs = {"search_url": "%s?%s" % (self.search_url, urlencode(payload))}
                    
                    thread = ThreadWithReturnValue(target=self.search, kwargs=kwargs)
                    threads.append(thread)
                    thread.start()
                    
                    if (i+1)%3==0:
                        print("sleeping")
                        time.sleep(2)

                for thread in threads:
                    lista = thread.join()
                    print("thread fetching ",len(lista))
                    retorno.extend(lista)
                    
                self.recursive = True
                
        ###
        ### FIM BLOCO FANTASMA
        ###
                    

        id_list = response['idlist']

        if id_list:
            lista = self._get_article_metadata(*id_list)
            retorno.extend(lista)
        return retorno

    def _search_term(self, queryterms: list, search_type: str = None):
        """Monta o termo de pesquisa completo para mandar para a API."""
        
        if type(queryterms) != list:
            return

        if search_type in ['querytext', None]:
            # Retorna simplesmente a busca concatenando com os OR's e AND's
            return "(%s)" % " AND ".join(["(%s)" % " OR ".join(orses) for orses in queryterms])
        elif search_type != 'meta_data':
            raise Exception('Tipo de pesquisa não faz sentido: %s\nTipos suportados:' % search_type)

        # Retorna concacentando com os OR'S e AND's, mas embutindo também os campos de pesquisa em cada termo
        queryterms = [[self._embutir_fields(orses) for orses in andes] for andes in queryterms]
        return "(%s)" % " AND ".join(["(%s)" % " OR ".join(orses) for orses in queryterms])

    def _embutir_fields(self, term: str):
        """Faz uma transformação, embutindo fields no termo de pesquisa.
        Isso é para poder realizar a pesquisa em apenas alguns campos ao invés de todos.
        Exemplo: sendo self.__fields = ['title', 'abstract'],
        a chamada
        `self._embutir_fields("machine learning")`
        Transforma:
            machine learning ---> (machine learning[title] OR machine learning[abstract])
        """

        return "(%s)" % " OR ".join(["%s[%s]" % (term, field) for field in self._fields])

    @staticmethod
    def deepgetter(obj, attrs, default=None):
        """Faz uma chamada sucessiva da função getattr, para ir pegando os atributos
        de um objeto.
        Exemplo:
        deepgetter(Cidade, 'regiao.pais') é equivalente a fazer Cidade.regiao.pais
        """
        getter = lambda x, y: getattr(x, y, default)
        return reduce(getter, attrs.split('.'), obj)

    @abstractmethod
    def _get_article_metadata(self, *args):
        """Cada subclasse deverá implementar a função que pega o retorno da API e transforma numa lista de dicionários
        no formato do modelo Documento."""
        pass

    @property
    @abstractmethod
    def _fields(self):
        """Cada subclasse deverá definir quais serão os campos de pesquisa de cada termo.
        O retorno deverá ser uma lista de fields.
        Exemplo:
        return ['title', 'abstract']
        """
        pass

    @property
    @abstractmethod
    def _db(self):
        """Cada subclasse deverá definir o seu banco.
        Exemplo:
        return 'pmc'
        """
        pass

    @property
    @abstractmethod
    def _sort_order(self):
        """Cada classe deverá definir o parâmetro sort_order.
        Exemplo:
        return 'Journal'
        """
        pass

    @property
    @abstractmethod
    def _article_url(self):
        """Cada classe deverá definir a URL da página de um artigo."""
        pass


class PMC_Searcher(NCBI_Searcher):
    """Realiza pesquisas na base PMC."""

    @property
    def _fields(self):
        return ['Abstract', 'Body - Key Terms', 'MeSH Terms',
                'MeSH Major Topic', 'Methods - Key Terms']

    @property
    def _db(self):
        return 'pmc'

    @property
    def _sort_order(self):
        return 'relevance'

    @property
    def _article_url(self):
        return 'https://www.ncbi.nlm.nih.gov/pmc/articles/'

    @staticmethod
    def _get_data(p_art):
        """Vasculha o XML (um <PubmedArticle>) para encontrar a data de publicação
        Se for encontrada uma data válida, retorna um datetime.
        Se não, retorna uma string, que espera-se que contenha uma informação de data"""

        try:
            pub_date = p_art.findAll("pub-date", {"pub-type": "epub"})[0]
        except:
            pub_date = p_art.findAll("pub-date", {"pub-type": "ppub"})[0]

        data_pub_string = "%s %s" % (pub_date.year.text, NCBI_Searcher.deepgetter(pub_date, 'month.text', default='Jan'))

        try:
            data = datetime.strptime(data_pub_string, "%Y %m").date()
        except:
            try:
                data = datetime.strptime(data_pub_string, "%Y %b").date()
            except:
                data = data_pub_string

        return data

    @staticmethod
    def _get_unique_id(p_art):
        """Vascula o XML (um <PubmedArticle>) para encontrar o ID único do artigo.
        Se nao tiver DOI presente no XML, coloca o ID que tiver (esperado que seja o PubMed ID)"""

        try:
            unique_id = p_art.findAll("article-id", {"pub-id-type": "doi"})[0].text
        except:
            unique_id = p_art.findAll("article-id")[0]
            unique_id = "%s%s" % (unique_id['pub-id-type'], unique_id.text)

        return unique_id

    def _get_article_metadata(self, *args):
        id_list = ','.join([str(x) for x in args])

        payload = {"id": id_list, "db": self._db, "retmode": "xml"}
        payload.update(self.ncbi_register)
        url = "%s?%s" % (self.fetch_url, urlencode(payload))
        print("URL META: %s" % url)

        t_05 = time.time()
        r = requests.get(url)
        print('{:15s}{:6.3f}'.format("response_M",time.time() - t_05))
        
        t_02 = time.time()
        # Pegar o XML, e transformar num dicionário
        d = json.loads(json.dumps(xmltodict.parse(r.content)))
        articles = d['pmc-articleset']['article']
        print('{:15s}{:6.3f}'.format("parse",time.time() - t_02))

        documentos = []
        append = documentos.append

        t_04 = time.time()
        debug = False
        
        ###
        ### DISCLAIMER: o código abaixo foi sendo feito aos ajustes para cada erro que dava
        ### não tente ler!
        ###
        
        for article in articles:
            ### TITULO
            try:
                title = article['front']['article-meta']['title-group']['article-title']
                if type(title) == dict:
                    # Pegando, dentre as possibilidades, a maior string (com sorte, esse realmente será o titulo)
                    title = sorted(title.values(), key=len)[-1]
            except Exception as e:
                title = ''
                if debug:
                    print(e.__class__.__name__, e, 'title')
                    ipdb.set_trace()

            ### AUTORES
            try:
                authors = []
                try:
                    for contrib in article['front']['article-meta']['contrib-group']['contrib']:
                        try:
                            authors.append("%s %s" % (contrib['name']['given-names'], contrib['name']['surname']))
                        except:
                            pass
                except TypeError:
                    for contrib in article['front']['article-meta']['contrib-group'][0]['contrib']:
                        try:
                            authors.append("%s %s" % (contrib['name']['given-names'], contrib['name']['surname']))
                        except:
                            pass
                    pass
            except Exception as e:
                authors = []
                if debug:
                    print(e.__class__.__name__, e, 'authors')
                    ipdb.set_trace()

            ### PALAVRAS CHAVE
            try:
                palavras_chave = [k if type(k) == str else k['#text'] for k in
                                  article['front']['article-meta']['kwd-group']['kwd']]
            except Exception as e:
                palavras_chave = []
                if debug:
                    print(e.__class__.__name__, e, 'kwd')
                    ipdb.set_trace()

            ### DOI / IDs
            try:
                doi = [id['#text'] for id in article['front']['article-meta']['article-id'] if id['@pub-id-type'] == 'doi'][-1] or ''
            except Exception as e:
                doi = ''
                if debug:
                    print(e.__class__.__name__, e, 'doi')
                    ipdb.set_trace()

            try:
                pmc_id = [id['#text'] for id in article['front']['article-meta']['article-id'] if id['@pub-id-type'] == 'pmc'][-1] or ''
            except Exception as e:
                pmc_id = ''
                if debug:
                    print(e.__class__.__name__, e, 'pmc_id')
                    ipdb.set_trace()

            ### Abstract
            try:
                abstract = article['front']['article-meta']['abstract']
                if type(abstract) == dict:
                    try:
                        resumo = abstract['p']['#text']
                    except TypeError:
                        try:
                            if type(abstract['p']) == str:
                                resumo = abstract['p']
                            elif type(abstract['p']) == list:
                                resumo = abstract['p'][0]['#text']
                        except KeyError:
                            try:
                                resumo = abstract['sec'][0]['p']
                            except:
                                resumo = ''
                        except:
                            resumo = ''
                    except:
                        resumo = ''
                elif type(abstract) == list:
                    for ab in abstract:
                        try:
                            ab['@abstract-type']
                        except:
                            continue

                        if ab['@abstract-type'] == 'author-highlights':

                            if type(ab['p']) == dict:
                                resumo = ab['p']['#text']
                            elif type(ab['p']) == list:
                                resumo = ''
                                for p in ab['p']:
                                    try:
                                        p['#text']
                                    except:
                                        continue
                                    resumo = "%s\n%s" % (resumo, p['#text'])
                else:
                    resumo = ''
            except Exception as e:
                # Não tem resumo
                resumo = ''
                if debug:
                    print(e.__class__.__name__, e, 'resumo')
                    ipdb.set_trace()
                    
            ###
            ### Fim da grosseria
            ###
            
            if not pmc_id or not title:
                # O mínimo é o LINK e o título para o documento ser incluso
                continue
            
            documento = {}
            documento['resumo'] = resumo
            documento['html_url'] = "%s%s" % (self._article_url, pmc_id)
            documento['autores'] = ",".join(authors)
            documento['doi'] = doi
            documento['palavras_chaves'] = ",".join(palavras_chave)
            documento['titulo'] = title


            append(documento)
            
        print('{:15s}{:6.3f}'.format("fetch",time.time() - t_04))

        return documentos


class PubMed_Searcher(NCBI_Searcher):
    """Realiza pesquisas na base PubMed."""

    @property
    def _fields(self):
        return ['Text Words']

    @property
    def _db(self):
        return 'pubmed'

    @property
    def _sort_order(self):
        return ''

    @property
    def _article_url(self):
        return "https://www.ncbi.nlm.nih.gov/pubmed/"

    @staticmethod
    def _get_data(p_art):
        """Vasculha o XML (um <PubmedArticle>) para encontrar a data de publicação
        Se for encontrada uma data válida, retorna um datetime.
        Se não, retorna uma string, que espera-se que contenha uma informação de data"""

        if hasattr(p_art.PubDate.Year, "text"):
            ano = p_art.PubDate.Year.text
        elif hasattr(p_art.PubDate.MedlineDate, "text"):
            ano = p_art.PubDate.MedlineDate.text[:8]

        try:
            data_pub_string = "%s %s" % (ano, NCBI_Searcher.deepgetter(p_art, 'PubDate.Month.text', default='Jan'))
            data = datetime.strptime(data_pub_string, "%Y %b").date()
        except:
            try:
                data_pub_string = "%s %s" % (ano, NCBI_Searcher.deepgetter(p_art, 'PubDate.Month.text', default='Jan'))
                data = datetime.strptime(data_pub_string, "%Y %m").date()
            except:
                data = str(p_art.PubDate.text)

        return data

    @staticmethod
    def _get_unique_id(p_art):
        """Vascula o XML (um <PubmedArticle>) para encontrar o ID único do artigo.
        Se nao tiver DOI presente no XML, coloca o ID que tiver (esperado que seja o PubMed ID)"""

        try:
            unique_id = p_art.findAll("ArticleId", {"IdType": "doi"})[0].text
        except:
            unique_id = p_art.findAll("ArticleId")[0]
            unique_id = "%s%s" % (unique_id['IdType'], unique_id.text)

        return unique_id

    def _get_article_metadata(self, *args):
        id_list = ','.join([str(x) for x in args])

        payload = {"id": id_list, "db": self._db, "retmode": "xml"}
        payload.update(self.ncbi_register)
        url = "%s?%s" % (self.fetch_url, urlencode(payload))

        print("URL META: %s" % url)

        t_03 = time.time()
        soup = bsoup(requests.get(url).content, "xml")
        print('{:15s}{:6.3f}'.format("parse",time.time() - t_03))

        pubmed_articles = soup.findAll('PubmedArticle')

        documentos = []
        append = documentos.append

        for p_art in pubmed_articles:
            authors = ["%s %s" % (a.ForeName.text, a.LastName.text) for a in p_art.findAll("Author")]
            keywords = [k.text for k in p_art.findAll("Keyword")]

            documento = {}
            documento['resumo'] = getattr(p_art.AbstractText, 'text', ' - ')
            documento['html_url'] = "%s%s" % (self._article_url, p_art.PMID.text)
            documento['autores'] = ",".join(authors)
            documento['doi'] = self._get_unique_id(p_art)
            documento['palavras_chaves'] = ",".join(keywords)
            documento['titulo'] = p_art.ArticleTitle.text
            data = self._get_data(p_art)
            if type(data) == str:
                documento['resumo'] = "%s\n%s" % (data, documento['resumo'])
            else:
                documento['data'] = self._get_data(p_art)

            append(documento)

        return documentos

In [4]:
# termos de pesquisa relacionados a tecnologia
technology_queryterms = [
    'machine learning', 'deep learning', 'artificial intelligence', 
    'neural network', 'scoring system'
]

# termos de pesquisa relacionados a area da saude
health_queryterms = [
    'coronary artery disease', 'chest pain', 'heart disease', 'MACE', 
    'Acute Cardiac Complications'
]

queryterms = [technology_queryterms, health_queryterms]

scimago_journals = ["Journal of the American College of Cardiology","Circulation","European Heart Journal"
                    ,"Circulation Research","Nature Biotechnology","Current Opinion in Biotechnology"
                   ,"Annual Review of Biomedical Engineering", "Circulation: Cardiovascular Interventions"]
eigenfactor_journals = ["Medical image Analysis","Biomaterials","Acta Biomaterialia","Physics in medicine and biology",
                        "IEEE TRANSACTIONS ON MEDICAL IMAGING","COMPUTER METHODS AND PROGRAMS IN BIOMEDICINE "
                       ,"INTERNATIONAL JOURNAL OF CARDIOLOGY", "CARDIOVASCULAR RESEARCH", "HEART RHYTHM"
                       ,"EUROPEAN JOURNAL OF CARDIO-THORACIC SURGERY","JACC-Cardiovascular Interventions"
                        ,"JOURNAL OF MOLECULAR AND CELLULAR CARDIOLOGY ", "JACC-Cardiovascular Imaging "
                        ,"Circulation-Heart Failure", "EUROPEAN JOURNAL OF HEART FAILURE", "EUROPACE"
                       ,"CATHETERIZATION AND CARDIOVASCULAR INTERVENTIONS", "Journal of the American Heart Association"
                       ,"JOURNAL OF THE AMERICAN SOCIETY OF ECHOCARDIOGRAPHY", "Circulation-Cardiovascular Imaging"]

journal = ["BioMedical Engineering OnLine",
           "Biomedical Engineering"] + scimago_journals + eigenfactor_journals


queryterms = [['machine learning'], ['ck mb']]
# r = PMC_Searcher().search(queryterms=queryterms, max_records=20)

In [8]:
r_01 = PMC_Searcher().search(queryterms=queryterms, max_records=20)
[a['titulo'][:30] for a in r_01]

response        2.058
Artigos encontrados:  223
URL META: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?id=5703994%2C5552265%2C5431941%2C5493079%2C5749823%2C4429500%2C4405523%2C4831781%2C4364461%2C4589532%2C3422834%2C2846872%2C5070523%2C4672312%2C4063215%2C5751801%2C3733825%2C4896250%2C3079734%2C5001212&db=pmc&retmode=xml&tool=Atena&email=ddddiegolima%40gmail.com
response_M     14.294
parse           3.058
fetch          21.235


['Developing a Machine Learning ',
 'Cardiac Function Improvement a',
 'Precision Radiology: Predictin',
 '36th International Symposium o',
 'Integrated genetic and epigene',
 'Abstracts from the 37th Annual',
 'Abstracts from the 38th Annual',
 'Cardiac Troponin Is a Predicto',
 'Abstracts for the 15th Interna',
 'Programmable bio-nano-chip sys',
 'Feature engineering combined w',
 'A study of health effects of l',
 'Learning statistical models of',
 'Poster Session III\nWednesday, ',
 'Identifying Adverse Drug Event',
 'Abstracts from the 9th Biennia',
 'MUNDUS project: MUltimodal Neu',
 'XXIV World Allergy Congress 20',
 'Multiplexed Immunoassay Panel ',
 '25th Annual Computational Neur']

In [10]:
r_02 = PMC_Searcher().search(queryterms=queryterms, max_records=40)
[a['titulo'][:30] for a in r_02]

response        0.843
Artigos encontrados:  223
URL META: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?id=5703994%2C5552265%2C5431941%2C5493079%2C5749823%2C4429500%2C4405523%2C4831781%2C4364461%2C4589532%2C3422834%2C2846872%2C5070523%2C4672312%2C4063215%2C5751801%2C3733825%2C4896250%2C3079734%2C5001212%2C4669991%2C4261149%2C4672311%2C4943498%2C4672310%2C4389287%2C3654146%2C5592441%2C3204938%2C5374552%2C4869311%2C5042923%2C4871254%2C3639733%2C3751474%2C4212306%2C4261147%2C5149586%2C484364%2C5350766&db=pmc&retmode=xml&tool=Atena&email=ddddiegolima%40gmail.com
response_M     46.773
parse           9.446
fetch          120.787


['Developing a Machine Learning ',
 'Cardiac Function Improvement a',
 'Precision Radiology: Predictin',
 '36th International Symposium o',
 'Integrated genetic and epigene',
 'Abstracts from the 37th Annual',
 'Abstracts from the 38th Annual',
 'Cardiac Troponin Is a Predicto',
 'Abstracts for the 15th Interna',
 'Programmable bio-nano-chip sys',
 'Feature engineering combined w',
 'A study of health effects of l',
 'Learning statistical models of',
 'Poster Session III\nWednesday, ',
 'Identifying Adverse Drug Event',
 'Abstracts from the 9th Biennia',
 'MUNDUS project: MUltimodal Neu',
 'XXIV World Allergy Congress 20',
 'Multiplexed Immunoassay Panel ',
 '25th Annual Computational Neur',
 'UEG Week 2015 Poster Presentat',
 'Poster Session III\nWednesday, ',
 'Poster Session II\nTuesday, Dec',
 'Proceedings of the 3rd IPLeiri',
 'Poster Session I\nMonday, Decem',
 'High Field Small Animal Magnet',
 'Abstracts from the 36th Annual',
 '26th Annual Computational Neur',
 'Profiles and M

### BLIRI

In [27]:
r_a = PMC_Searcher().search(queryterms=queryterms, max_records=20)
[a['titulo'][:30] for a in r_a]

response        1.589
Artigos encontrados:  223
URL META: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?id=5703994%2C5552265%2C5431941%2C5493079%2C5749823%2C4429500%2C4405523%2C4831781%2C4364461%2C4589532%2C3422834%2C2846872%2C5070523%2C4672312%2C4063215%2C5751801%2C3733825%2C4896250%2C3079734%2C5001212&db=pmc&retmode=xml&tool=Atena&email=ddddiegolima%40gmail.com
response_M      9.921
parse           1.040
fetch           0.003


['Developing a Machine Learning ',
 'Cardiac Function Improvement a',
 'Precision Radiology: Predictin',
 '36th International Symposium o',
 'Integrated genetic and epigene',
 'Abstracts from the 37th Annual',
 'Abstracts from the 38th Annual',
 'Cardiac Troponin Is a Predicto',
 'Abstracts for the 15th Interna',
 'Programmable bio-nano-chip sys',
 'Feature engineering combined w',
 'A study of health effects of l',
 'Learning statistical models of',
 'Poster Session III\nWednesday, ',
 'Identifying Adverse Drug Event',
 'Abstracts from the 9th Biennia',
 'MUNDUS project: MUltimodal Neu',
 'XXIV World Allergy Congress 20',
 'Multiplexed Immunoassay Panel ',
 '25th Annual Computational Neur']

In [25]:
r_b = PMC_Searcher().search(queryterms=queryterms, max_records=40)
[a['titulo'][:30] for a in r_b[:20]]

response        1.016
Artigos encontrados:  223
URL META: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?id=5703994%2C5552265%2C5431941%2C5493079%2C5749823%2C4429500%2C4405523%2C4831781%2C4364461%2C4589532%2C3422834%2C2846872%2C5070523%2C4672312%2C4063215%2C5751801%2C3733825%2C4896250%2C3079734%2C5001212%2C4669991%2C4261149%2C4672311%2C4943498%2C4672310%2C4389287%2C3654146%2C5592441%2C3204938%2C5374552%2C4869311%2C5042923%2C4871254%2C3639733%2C3751474%2C4212306%2C4261147%2C5149586%2C484364%2C5350766&db=pmc&retmode=xml&tool=Atena&email=ddddiegolima%40gmail.com
response_M     18.025
parse           3.223
fetch           0.006


['Developing a Machine Learning ',
 'Cardiac Function Improvement a',
 'Precision Radiology: Predictin',
 '36th International Symposium o',
 'Integrated genetic and epigene',
 'Abstracts from the 37th Annual',
 'Abstracts from the 38th Annual',
 'Cardiac Troponin Is a Predicto',
 'Abstracts for the 15th Interna',
 'Programmable bio-nano-chip sys',
 'Feature engineering combined w',
 'A study of health effects of l',
 'Learning statistical models of',
 'Poster Session III\nWednesday, ',
 'Identifying Adverse Drug Event',
 'Abstracts from the 9th Biennia',
 'MUNDUS project: MUltimodal Neu',
 'XXIV World Allergy Congress 20',
 'Multiplexed Immunoassay Panel ',
 '25th Annual Computational Neur']

In [26]:
r_c = PMC_Searcher().search(queryterms=queryterms, max_records=60)
[a['titulo'][:30] for a in r_b[:20]]

response        1.841
Artigos encontrados:  223
URL META: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?id=5703994%2C5552265%2C5431941%2C5493079%2C5749823%2C4429500%2C4405523%2C4831781%2C4364461%2C4589532%2C3422834%2C2846872%2C5070523%2C4672312%2C4063215%2C5751801%2C3733825%2C4896250%2C3079734%2C5001212%2C4669991%2C4261149%2C4672311%2C4943498%2C4672310%2C4389287%2C3654146%2C5592441%2C3204938%2C5374552%2C4869311%2C5042923%2C4871254%2C3639733%2C3751474%2C4212306%2C4261147%2C5149586%2C484364%2C5350766%2C4682919%2C4272368%2C5592442%2C1683569%2C5575665%2C4870725%2C4244175%2C4061745%2C3044641%2C5009929%2C3060650%2C4444413%2C4944947%2C5042925%2C5615764%2C5010413%2C4189906%2C4284756%2C1913720%2C5310653&db=pmc&retmode=xml&tool=Atena&email=ddddiegolima%40gmail.com
response_M     55.852
parse           4.245
fetch           0.006


['Developing a Machine Learning ',
 'Cardiac Function Improvement a',
 'Precision Radiology: Predictin',
 '36th International Symposium o',
 'Integrated genetic and epigene',
 'Abstracts from the 37th Annual',
 'Abstracts from the 38th Annual',
 'Cardiac Troponin Is a Predicto',
 'Abstracts for the 15th Interna',
 'Programmable bio-nano-chip sys',
 'Feature engineering combined w',
 'A study of health effects of l',
 'Learning statistical models of',
 'Poster Session III\nWednesday, ',
 'Identifying Adverse Drug Event',
 'Abstracts from the 9th Biennia',
 'MUNDUS project: MUltimodal Neu',
 'XXIV World Allergy Congress 20',
 'Multiplexed Immunoassay Panel ',
 '25th Annual Computational Neur']

In [88]:
r_d = PMC_Searcher().search(queryterms=queryterms)
[a['titulo'][:30] for a in r_d[:20]]

response        1.722
Artigos encontrados:  223
sleeping
response        0.655
URL META: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?id=4669991%2C4261149%2C4672311%2C4943498%2C4672310%2C4389287%2C3654146%2C5592441%2C3204938%2C5374552%2C4869311%2C5042923%2C4871254%2C3639733%2C3751474%2C4212306%2C4261147%2C5149586%2C484364%2C5350766&db=pmc&retmode=xml&tool=Atena&email=ddddiegolima%40gmail.com
response        0.669
URL META: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?id=1501050%2C5461533%2C4701479%2C5650402%2C4070603%2C3324437%2C2683819%2C3204897%2C4070608%2C4244172%2C3533624%2C2872253%2C3533621%2C5687535%2C5766549%2C5637785%2C3481065%2C5330968%2C5029834%2C2219573&db=pmc&retmode=xml&tool=Atena&email=ddddiegolima%40gmail.com
response        0.690
URL META: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?id=4682919%2C4272368%2C5592442%2C1683569%2C5575665%2C4870725%2C4244175%2C4061745%2C3044641%2C5009929%2C3060650%2C4444413%2C4944947%2C5042925%2C5615764

['UEG Week 2015 Poster Presentat',
 'Poster Session III\nWednesday, ',
 'Poster Session II\nTuesday, Dec',
 'Proceedings of the 3rd IPLeiri',
 'Poster Session I\nMonday, Decem',
 'High Field Small Animal Magnet',
 'Abstracts from the 36th Annual',
 '26th Annual Computational Neur',
 'Profiles and Majority Voting-B',
 '37th International Symposium o',
 'Perfusion Pressure Cerebral In',
 'ESICM LIVES 2016: part two',
 'Choline metabolism-based molec',
 '2nd European Headache and Migr',
 'Development and evaluation of ',
 'UEG Week 2014 Poster Presentat',
 'Poster Session I\nMonday, Decem',
 'ACNP 55th Annual Meeting: Post',
 'On display in the Exhibition H',
 'ACTS Abstracts']

In [85]:
from collections import Counter
counts = Counter([a['titulo'] for a in r_d])
{k:v for k,v in counts.items() if v > 1}

{}

In [89]:
len([a['titulo'] for a in r_d])

222