In [1]:
from Bio import Entrez
from Bio import SeqIO
from Bio import Seq
from Bio import Medline
import re
Entrez.email = "goncalocardoso2016@gmail.com" 

# Pesquisa genérica 

In [7]:
gene = "PDE3B Homo sapiens"
diabetes = "PDE3B diabetes Homo sapiens"
cAMP = "PDE3B and cAMP Homo sapiens"
cancro = "PDE3B and malignant tumor Homo sapiens"

In [44]:
handle = Entrez.esearch(db = "pubmed", term = gene, retmax = 10, sort = "relevance")
record = Entrez.read(handle)
handle.close()
id_list = record['IdList']
print(id_list)

['34986332', '34764459', '32089240', '30580553', '22001403', '30275531', '34875679', '31176020', '17368848', '35813615']


In [45]:
handle = Entrez.efetch(db="pubmed", id=id_list, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
print("Artigos", "\n")
for record in records:
    print("PubMedID:", record['PMID'])
    print("Título:", record['TI'])
    print("Autores:", record['AU'])
    print("Source:", record['SO'],'\n')

Artigos 

PubMedID: 34986332
Título: FGF1 and insulin control lipolysis by convergent pathways.
Autores: ['Sancar G', 'Liu S', 'Gasser E', 'Alvarez JG', 'Moutos C', 'Kim K', 'van Zutphen T', 'Wang Y', 'Huddy TF', 'Ross B', 'Dai Y', 'Zepeda D', 'Collins B', 'Tilley E', 'Kolar MJ', 'Yu RT', 'Atkins AR', 'van Dijk TH', 'Saghatelian A', 'Jonker JW', 'Downes M', 'Evans RM']
Source: Cell Metab. 2022 Jan 4;34(1):171-183.e6. doi: 10.1016/j.cmet.2021.12.004. 

PubMedID: 34764459
Título: Connecting copper and cancer: from transition metal signalling to metalloplasia.
Autores: ['Ge EJ', 'Bush AI', 'Casini A', 'Cobine PA', 'Cross JR', 'DeNicola GM', 'Dou QP', 'Franz KJ', 'Gohil VM', 'Gupta S', 'Kaler SG', 'Lutsenko S', 'Mittal V', 'Petris MJ', 'Polishchuk R', 'Ralle M', 'Schilsky ML', 'Tonks NK', 'Vahdat LT', 'Van Aelst L', 'Xi D', 'Yuan P', 'Brady DC', 'Chang CJ']
Source: Nat Rev Cancer. 2022 Feb;22(2):102-113. doi: 10.1038/s41568-021-00417-2. Epub 2021 Nov 11. 

PubMedID: 32089240
Título: Astrag

Extração dos ids referentes ao PMC (se possível)

In [11]:
PMCID = []
for id in id_list:
    try:
        handle = Entrez.efetch(db="pubmed", id=id, retmode="xml") 
        record = Entrez.read(handle)
        record = str(record)
        result = re.findall(r"StringElement\('(PMC[\d]+)',\sattributes={'IdType':\s'pmc'}\)", record)
        PMCID.append(result[-1])
    except: pass

['PMC8863067', 'PMC8810673', 'PMC3225700', 'PMC6521726', 'PMC8947777', 'PMC9261097']


Download dos artigos PMC em formato PDF 

In [17]:
import requests
from bs4 import BeautifulSoup
from requests_html import HTMLSession
from requests.exceptions import ConnectionError


In [43]:
s = HTMLSession()
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'}
for pmc in PMCID:
    try:
        url = "https://pubmed.ncbi.nlm.nih.gov/pmc/articles/"
        r = s.get(url + pmc + "/", headers=headers, timeout=5)
        pdf_url = 'https://www.ncbi.nlm.nih.gov' + r.html.find('a', first=True).attrs['href']
        r = s.get(pdf_url, stream=True)
        with open(id + '.pdf', 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)

    except ConnectionError as e:
        continue


In [None]:
url = "https://pubmed.ncbi.nlm.nih.gov/pmc/articles/"
r = requests.get(url + "PMC8863067" + "/")

In [41]:
url = "https://pubmed.ncbi.nlm.nih.gov/pmc/articles/"
for x in PMCID:
    r = requests.get(url + x + "/")
    soup = BeautifulSoup(r.text, 'html.parser')
    links = soup.find_all('a')
    i = 0
    for link in links:
        if ('.pdf' in link.get('href', [])):
            i += 1
            print("Downloading file: ", i)
  
            # Get response object for link
            response = requests.get(link.get('href'))
  
            # Write content in pdf file
            pdf = open("pdf"+str(i)+".pdf", 'wb')
            pdf.write(response.content)
            pdf.close()
            print("File ", i, " downloaded")

# cAMP

In [46]:
handle = Entrez.esearch(db = "pubmed", term = cAMP, retmax = 10, sort = "relevance")
record = Entrez.read(handle)
handle.close()
id_list_camp = record['IdList']
print(id_list)

['34986332', '34764459', '32089240', '30580553', '22001403', '30275531', '34875679', '31176020', '17368848', '35813615']


In [48]:
handle = Entrez.efetch(db="pubmed", id=id_list_camp, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
print("Artigos", "\n")
for record in records:
    print("PubMedID:", record['PMID'])
    print("Título:", record['TI'])
    print("Autores:", record['AU'])
    print("Source:", record['SO'],'\n')

Artigos 

PubMedID: 34986332
Título: FGF1 and insulin control lipolysis by convergent pathways.
Autores: ['Sancar G', 'Liu S', 'Gasser E', 'Alvarez JG', 'Moutos C', 'Kim K', 'van Zutphen T', 'Wang Y', 'Huddy TF', 'Ross B', 'Dai Y', 'Zepeda D', 'Collins B', 'Tilley E', 'Kolar MJ', 'Yu RT', 'Atkins AR', 'van Dijk TH', 'Saghatelian A', 'Jonker JW', 'Downes M', 'Evans RM']
Source: Cell Metab. 2022 Jan 4;34(1):171-183.e6. doi: 10.1016/j.cmet.2021.12.004. 

PubMedID: 30580553
Título: n-3 Polyunsaturated fatty acids for the management of alcoholic liver disease: A critical review.
Autores: ['Wang M', 'Ma LJ', 'Yang Y', 'Xiao Z', 'Wan JB']
Source: Crit Rev Food Sci Nutr. 2019;59(sup1):S116-S129. doi: 10.1080/10408398.2018.1544542. Epub 2018 Dec 22. 

PubMedID: 22001403
Título: From PDE3B to the regulation of energy homeostasis.
Autores: ['Degerman E', 'Ahmad F', 'Chung YW', 'Guirguis E', 'Omar B', 'Stenson L', 'Manganiello V']
Source: Curr Opin Pharmacol. 2011 Dec;11(6):676-82. doi: 10.1016/j.

# Diabetes

In [50]:
handle = Entrez.esearch(db = "pubmed", term = diabetes, retmax = 10, sort = "relevance")
record = Entrez.read(handle)
handle.close()
id_list_diab = record['IdList']
print(id_list_diab)

['34986332', '34764459', '32089240', '30275531', '22001403', '34875679', '35773277', '12904862', '33112806', '33237390']


In [51]:
handle = Entrez.efetch(db="pubmed", id=id_list_diab, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
print("Artigos", "\n")
for record in records:
    print("PubMedID:", record['PMID'])
    print("Título:", record['TI'])
    print("Autores:", record['AU'])
    print("Source:", record['SO'],'\n')

Artigos 

PubMedID: 34986332
Título: FGF1 and insulin control lipolysis by convergent pathways.
Autores: ['Sancar G', 'Liu S', 'Gasser E', 'Alvarez JG', 'Moutos C', 'Kim K', 'van Zutphen T', 'Wang Y', 'Huddy TF', 'Ross B', 'Dai Y', 'Zepeda D', 'Collins B', 'Tilley E', 'Kolar MJ', 'Yu RT', 'Atkins AR', 'van Dijk TH', 'Saghatelian A', 'Jonker JW', 'Downes M', 'Evans RM']
Source: Cell Metab. 2022 Jan 4;34(1):171-183.e6. doi: 10.1016/j.cmet.2021.12.004. 

PubMedID: 34764459
Título: Connecting copper and cancer: from transition metal signalling to metalloplasia.
Autores: ['Ge EJ', 'Bush AI', 'Casini A', 'Cobine PA', 'Cross JR', 'DeNicola GM', 'Dou QP', 'Franz KJ', 'Gohil VM', 'Gupta S', 'Kaler SG', 'Lutsenko S', 'Mittal V', 'Petris MJ', 'Polishchuk R', 'Ralle M', 'Schilsky ML', 'Tonks NK', 'Vahdat LT', 'Van Aelst L', 'Xi D', 'Yuan P', 'Brady DC', 'Chang CJ']
Source: Nat Rev Cancer. 2022 Feb;22(2):102-113. doi: 10.1038/s41568-021-00417-2. Epub 2021 Nov 11. 

PubMedID: 32089240
Título: Astrag

# Cancro

In [53]:
handle = Entrez.esearch(db = "pubmed", term = cancro, retmax = 10, sort = "relevance")
record = Entrez.read(handle)
handle.close()
id_list_cancro = record['IdList']
print(id_list_cancro)

['34764459', '35813615', '30530703', '24133626', '10721768', '30218018', '28275299', '24282571', '22904677', '21152070']


In [54]:
handle = Entrez.efetch(db="pubmed", id=id_list_cancro, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
print("Artigos", "\n")
for record in records:
    print("PubMedID:", record['PMID'])
    print("Título:", record['TI'])
    print("Autores:", record['AU'])
    print("Source:", record['SO'],'\n')

Artigos 

PubMedID: 34764459
Título: Connecting copper and cancer: from transition metal signalling to metalloplasia.
Autores: ['Ge EJ', 'Bush AI', 'Casini A', 'Cobine PA', 'Cross JR', 'DeNicola GM', 'Dou QP', 'Franz KJ', 'Gohil VM', 'Gupta S', 'Kaler SG', 'Lutsenko S', 'Mittal V', 'Petris MJ', 'Polishchuk R', 'Ralle M', 'Schilsky ML', 'Tonks NK', 'Vahdat LT', 'Van Aelst L', 'Xi D', 'Yuan P', 'Brady DC', 'Chang CJ']
Source: Nat Rev Cancer. 2022 Feb;22(2):102-113. doi: 10.1038/s41568-021-00417-2. Epub 2021 Nov 11. 

PubMedID: 35813615
Título: Pathogenesis of Primary Aldosteronism: Impact on Clinical Outcome.
Autores: ['Santana LS', 'Guimaraes AG', 'Almeida MQ']
Source: Front Endocrinol (Lausanne). 2022 Jun 23;13:927669. doi: 10.3389/fendo.2022.927669. eCollection 2022. 

PubMedID: 30530703
Título: Anagrelide for Gastrointestinal Stromal Tumor.
Autores: ['Pulkka OP', 'Gebreyohannes YK', 'Wozniak A', 'Mpindi JP', 'Tynninen O', 'Icay K', 'Cervera A', 'Keskitalo S', 'Murumagi A', 'Kulesskiy

# Proteína (isoforma 2)

Usar referências da entrada no genebank da isoforma de interesse para obter mais artigos relevantes

In [55]:
handle = Entrez.efetch(db="protein", rettype="gb", retmode="text", id="NP_000913.2")
seq_record_pro = SeqIO.read(handle, "gb")

In [61]:
ids_isoforma = []
for x in range(len(seq_record_pro.annotations["references"])):
    ids_isoforma.append(seq_record_pro.annotations["references"][x].pubmed_id)
print(ids_isoforma)

['32949251', '33112806', '32967310', '32296183', '31176020', '8706823', '8626720', '8557689', '8562305', '8163498']


In [62]:
handle = Entrez.efetch(db="pubmed", id=ids_isoforma, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
print("Artigos", "\n")
for record in records:
    print("PubMedID:", record['PMID'])
    print("Título:", record['TI'])
    print("Autores:", record['AU'])
    print("Source:", record['SO'],'\n')
    
    

Artigos 

PubMedID: 32949251
Título: Impact of phosphodiesterases PDE3 and PDE4 on 5-hydroxytryptamine receptor4-mediated increase of cAMP in human atrial fibrillation.
Autores: ['Dolce B', 'Christ T', 'Grammatika Pavlidou N', 'Yildirim Y', 'Reichenspurner H', 'Eschenhagen T', 'Nikolaev VO', 'Kaumann AJ', 'Molina CE']
Source: Naunyn Schmiedebergs Arch Pharmacol. 2021 Feb;394(2):291-298. doi: 10.1007/s00210-020-01968-1. Epub 2020 Sep 19. 

PubMedID: 33112806
Título: Phosphodiesterase 2A and 3B variants are associated with primary aldosteronism.
Autores: ['Rassi-Cruz M', 'Maria AG', 'Faucz FR', 'London E', 'Vilela LAP', 'Santana LS', 'Benedetti AFF', 'Goldbaum TS', 'Tanno FY', 'Srougi V', 'Chambo JL', 'Pereira MAA', 'Cavalcante ACBS', 'Carnevale FC', 'Pilan B', 'Bortolotto LA', 'Drager LF', 'Lerario AM', 'Latronico AC', 'Fragoso MCBV', 'Mendonca BB', 'Zerbini MCN', 'Stratakis CA', 'Almeida MQ']
Source: Endocr Relat Cancer. 2021 Jan;28(1):1-13. doi: 10.1530/ERC-20-0384. 

PubMedID: 329673