In [3]:
from Bio import Entrez
from Bio import SeqIO
from Bio import Seq
from Bio import Medline
import re
Entrez.email = "joanaoliveira1000@gmail.com" 

# Pesquisa genérica 

In [1]:
gene = "KCNQ1 Homo sapiens"
diabetes = "KCNQ1 diabetes Homo sapiens"
cancro = "KCNQ1 and malignant tumor Homo sapiens"

In [4]:
handle = Entrez.esearch(db = "pubmed", term = gene, retmax = 10, sort = "relevance")
record = Entrez.read(handle)
handle.close()
id_list = record['IdList']
print(id_list)

['31883792', '33322401', '33504163', '26762596', '35765105', '31593778', '25653179', '28373572', '32667860', '35893051']


In [5]:
handle = Entrez.efetch(db="pubmed", id=id_list, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
print("Artigos", "\n")
for record in records:
    print("PubMedID:", record['PMID'])
    print("Título:", record['TI'])
    print("Autores:", record['AU'])
    print("Source:", record['SO'],'\n')

Artigos 

PubMedID: 31883792
Título: Structural Basis of Human KCNQ1 Modulation and Gating.
Autores: ['Sun J', 'MacKinnon R']
Source: Cell. 2020 Jan 23;180(2):340-347.e9. doi: 10.1016/j.cell.2019.12.003. Epub 2019 Dec 26. 

PubMedID: 33322401
Título: Insights into Cardiac IKs (KCNQ1/KCNE1) Channels Regulation.
Autores: ['Wu X', 'Larsson HP']
Source: Int J Mol Sci. 2020 Dec 11;21(24):9440. doi: 10.3390/ijms21249440. 

PubMedID: 33504163
Título: Suppression-Replacement KCNQ1 Gene Therapy for Type 1 Long QT Syndrome.
Autores: ['Dotzler SM', 'Kim CSJ', 'Gendron WAC', 'Zhou W', 'Ye D', 'Bos JM', 'Tester DJ', 'Barry MA', 'Ackerman MJ']
Source: Circulation. 2021 Apr 6;143(14):1411-1425. doi: 10.1161/CIRCULATIONAHA.120.051836. Epub 2021 Jan 28. 

PubMedID: 26762596
Título: Genetic model.
Autores: ['Zhao F', 'Song M', 'Wang Y', 'Wang W']
Source: J Cell Mol Med. 2016 Apr;20(4):765. doi: 10.1111/jcmm.12751. Epub 2016 Jan 14. 

PubMedID: 35765105
Título: KCNQ1-deficient and KCNQ1-mutant human embr

Extração dos ids referentes ao PMC (se possível)

In [6]:
PMCID = []
for id in id_list:
    try:
        handle = Entrez.efetch(db="pubmed", id=id, retmode="xml") 
        record = Entrez.read(handle)
        record = str(record)
        result = re.findall(r"StringElement\('(PMC[\d]+)',\sattributes={'IdType':\s'pmc'}\)", record)
        PMCID.append(result[-1])
    except: pass

Download dos artigos PMC em formato PDF 

In [7]:
import requests
from bs4 import BeautifulSoup
from requests_html import HTMLSession
from requests.exceptions import ConnectionError


In [8]:
s = HTMLSession()
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'}
for pmc in PMCID:
    try:
        url = "https://pubmed.ncbi.nlm.nih.gov/pmc/articles/"
        r = s.get(url + pmc + "/", headers=headers, timeout=5)
        pdf_url = 'https://www.ncbi.nlm.nih.gov' + r.html.find('a', first=True).attrs['href']
        r = s.get(pdf_url, stream=True)
        with open(id + '.pdf', 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)

    except ConnectionError as e:
        continue


In [9]:
url = "https://pubmed.ncbi.nlm.nih.gov/pmc/articles/"
r = requests.get(url + "PMC8863067" + "/")

In [10]:
url = "https://pubmed.ncbi.nlm.nih.gov/pmc/articles/"
for x in PMCID:
    r = requests.get(url + x + "/")
    soup = BeautifulSoup(r.text, 'html.parser')
    links = soup.find_all('a')
    i = 0
    for link in links:
        if ('.pdf' in link.get('href', [])):
            i += 1
            print("Downloading file: ", i)
  
            # Get response object for link
            response = requests.get(link.get('href'))
  
            # Write content in pdf file
            pdf = open("pdf"+str(i)+".pdf", 'wb')
            pdf.write(response.content)
            pdf.close()
            print("File ", i, " downloaded")

# Diabetes

In [11]:
handle = Entrez.esearch(db = "pubmed", term = diabetes, retmax = 10, sort = "relevance")
record = Entrez.read(handle)
handle.close()
id_list_diab = record['IdList']
print(id_list_diab)

['28607931', '24558078', '34547194', '32363639', '33795864', '35893051', '25560470', '32695830', '26762596', '20606385']


In [12]:
handle = Entrez.efetch(db="pubmed", id=id_list_diab, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
print("Artigos", "\n")
for record in records:
    print("PubMedID:", record['PMID'])
    print("Título:", record['TI'])
    print("Autores:", record['AU'])
    print("Source:", record['SO'],'\n')

Artigos 

PubMedID: 28607931
Título: Genetic Epidemiology of Type 2 Diabetes in Mexican Mestizos.
Autores: ['Garcia-Chapa EG', 'Leal-Ugarte E', 'Peralta-Leal V', 'Duran-Gonzalez J', 'Meza-Espinoza JP']
Source: Biomed Res Int. 2017;2017:3937893. doi: 10.1155/2017/3937893. Epub 2017 May 18. 

PubMedID: 24558078
Título: The pharmacogenetics of type 2 diabetes: a systematic review.
Autores: ['Maruthur NM', 'Gribble MO', 'Bennett WL', 'Bolen S', 'Wilson LM', 'Balakrishnan P', 'Sahu A', 'Bass E', 'Kao WH', 'Clark JM']
Source: Diabetes Care. 2014;37(3):876-86. doi: 10.2337/dc13-1276. 

PubMedID: 34547194
Título: Congenital hyperinsulinism: recent updates on molecular mechanisms, diagnosis and management.
Autores: ['Giri D', 'Hawton K', 'Senniappan S']
Source: J Pediatr Endocrinol Metab. 2021 Sep 21;35(3):279-296. doi: 10.1515/jpem-2021-0369. Print 2022 Mar 28. 

PubMedID: 32363639
Título: Epigenetic regulation of insulin action and secretion - role in the pathogenesis of type 2 diabetes.
Auto

# Cancro

In [13]:
handle = Entrez.esearch(db = "pubmed", term = cancro, retmax = 10, sort = "relevance")
record = Entrez.read(handle)
handle.close()
id_list_cancro = record['IdList']
print(id_list_cancro)

['31636470', '34827600', '34850551', '23933686', '33994860', '35954238', '35395779', '35129069', '30014247', '28373572']


In [14]:
handle = Entrez.efetch(db="pubmed", id=id_list_cancro, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
print("Artigos", "\n")
for record in records:
    print("PubMedID:", record['PMID'])
    print("Título:", record['TI'])
    print("Autores:", record['AU'])
    print("Source:", record['SO'],'\n')

Artigos 

PubMedID: 31636470
Título: Role of ion channels in gastrointestinal cancer.
Autores: ['Anderson KJ', 'Cormier RT', 'Scott PM']
Source: World J Gastroenterol. 2019 Oct 14;25(38):5732-5772. doi: 10.3748/wjg.v25.i38.5732. 

PubMedID: 34827600
Título: KCNQ1OT1: An Oncogenic Long Noncoding RNA.
Autores: ['Cagle P', 'Qi Q', 'Niture S', 'Kumar D']
Source: Biomolecules. 2021 Oct 29;11(11):1602. doi: 10.3390/biom11111602. 

PubMedID: 34850551
Título: ALKBH5-mediated m6A modification of lncRNA KCNQ1OT1 triggers the development of LSCC via upregulation of HOXA9.
Autores: ['Li Y', 'Yan B', 'Wang X', 'Li Q', 'Kan X', 'Wang J', 'Sun Y', 'Wang P', 'Tian L', 'Liu M']
Source: J Cell Mol Med. 2022 Jan;26(2):385-398. doi: 10.1111/jcmm.17091. Epub 2021 Dec 1. 

PubMedID: 23933686
Título: Serum and glucocorticoid inducible kinase, metabolic syndrome, inflammation, and tumor growth.
Autores: ['Lang F', 'Stournaras C']
Source: Hormones (Athens). 2013 Apr-Jun;12(2):160-71. doi: 10.14310/horm.2002.14

# Proteína (isoforma 1)

Usar referências da entrada no genebank da isoforma de interesse para obter mais artigos relevantes

In [16]:
handle = Entrez.efetch(db="protein", rettype="gb", retmode="text", id="NP_000209.2")
seq_record_pro = SeqIO.read(handle, "gb")

In [17]:
ids_isoforma = []
for x in range(len(seq_record_pro.annotations["references"])):
    ids_isoforma.append(seq_record_pro.annotations["references"][x].pubmed_id)
print(ids_isoforma)

['35870481', '36102229', '35893051', '19617707', '9020845', '8818942', '8528244', '20301579', '20301568', '20301308']


In [18]:
handle = Entrez.efetch(db="pubmed", id=ids_isoforma, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
print("Artigos", "\n")
for record in records:
    if len(record) == 0:
    print("PubMedID:", record['PMID'])
    print("Título:", record['TI'])
    print("Autores:", record['AU'])
    print("Source:", record['SO'],'\n')
    
    

Artigos 

PubMedID: 35870481
Título: Purification and membrane interactions of human KCNQ1(100-370) potassium ion channel.
Autores: ['Dixit G', 'Stowe RB', 'Bates A', 'Jaycox CK', 'Escobar JR', 'Harding BD', 'Drew DL Jr', 'New CP', 'Sahu ID', 'Edelmann RE', 'Dabney-Smith C', 'Sanders CR', 'Lorigan GA']
Source: Biochim Biophys Acta Biomembr. 2022 Nov 1;1864(11):184010. doi: 10.1016/j.bbamem.2022.184010. Epub 2022 Jul 21. 

PubMedID: 36102229
Título: To Modify or Not to Modify: Allele-Specific Effects of 3'UTR-KCNQ1 Single Nucleotide Polymorphisms on Clinical Phenotype in a Long QT 1 Founder Population Segregating a Dominant-Negative Mutation.
Autores: ['Winbo A', 'Diamant UB', 'Persson J', 'Jensen SM', 'Rydberg A']
Source: J Am Heart Assoc. 2022 Sep 20;11(18):e025981. doi: 10.1161/JAHA.122.025981. Epub 2022 Sep 14. 

PubMedID: 35893051
Título: KCNJ11 and KCNQ1 Gene Polymorphisms and Placental Expression in Women with Gestational Diabetes Mellitus.
Autores: ['Majcher S', 'Ustianowski P',

KeyError: 'SO'