In [1]:
# 9.2.1.efetch_example / Entrez.efetch() 메서드로 NCBI 데이터 읽어오기

from Bio import Entrez

Entrez.email = 'qnfrdms2003@gmail.com'
handle = Entrez.efetch(db = 'nucleotide', id = 'NC_002058.3', rettype = 'gb', retmode = 'text')
print(handle.read())

LOCUS       NC_002058               7440 bp ss-RNA     linear   VRL 13-AUG-2018
DEFINITION  Poliovirus, complete genome.
ACCESSION   NC_002058 NC_001428 NC_014336
VERSION     NC_002058.3
DBLINK      BioProject: PRJNA485481
KEYWORDS    RefSeq; coat protein; complementary DNA; genome; polyprotein.
SOURCE      Enterovirus C
  ORGANISM  Enterovirus C
            Viruses; Riboviria; Orthornavirae; Pisuviricota; Pisoniviricetes;
            Picornavirales; Picornaviridae; Ensavirinae; Enterovirus.
REFERENCE   1  (bases 1 to 742)
  AUTHORS   Kafasla,P., Morgner,N., Robinson,C.V. and Jackson,R.J.
  TITLE     Polypyrimidine tract-binding protein stimulates the poliovirus IRES
            by modulating eIF4G binding
  JOURNAL   EMBO J. 29 (21), 3710-3722 (2010)
   PUBMED   20859255
REFERENCE   2  (bases 1 to 742)
  AUTHORS   Vogt,D.A. and Andino,R.
  TITLE     An RNA element at the 5'-end of the poliovirus genome functions as
            a general promoter for RNA synthesis
  JOURNAL   PLoS Path

In [4]:
# 9.2.2.entrez_read_example / Entrez.read() 메서드로 XML 데이터 읽기

from Bio import Entrez

Entrez.email = 'qnfrdms2003@gmail.com'
handle = Entrez.efetch(db = 'nucleotide', id = 'NC_002058.3', rettype = 'gb', retmode = 'xml')
records = Entrez.read(handle)
for record in records:
    print(record['GBSeq_locus'])
    print(record['GBSeq_definition'])
    print(record['GBSeq_strandedness'], record['GBSeq_moltype'])
    print(record['GBSeq_length'], 'bp')
    print(len(record['GBSeq_references']), 'journals')

NC_002058
Poliovirus, complete genome
single RNA
7440 bp
24 journals


In [5]:
# 9.2.2.entrez_parse_example / Entrez.parse() 메서드로 XML 데이터 읽기

from Bio import Entrez

Entrez.email = 'qnfrdms2003@gmail.com'
handle = Entrez.efetch(db = 'nucleotide', id = 'NC_002058.3', rettype = 'gb', retmode = 'xml')
records = Entrez.parse(handle)
for record in records:
    for journal in record['GBSeq_references']:
        print(journal['GBReference_title'])

Polypyrimidine tract-binding protein stimulates the poliovirus IRES by modulating eIF4G binding
An RNA element at the 5'-end of the poliovirus genome functions as a general promoter for RNA synthesis
Replication of poliovirus requires binding of the poly(rC) binding protein to the cloverleaf as well as to the adjacent C-rich spacer sequence between the cloverleaf and the internal ribosomal entry site
Interaction of translation initiation factor eIF4B with the poliovirus internal ribosome entry site
Poly (rC) binding protein 2 forms a ternary complex with the 5'-terminal sequences of poliovirus RNA and the viral 3CD proteinase
Cis-element, oriR, involved in the initiation of (-) strand poliovirus RNA: a quasi-globular multi-domain RNA structure maintained by tertiary ('kissing') interactions
Poly(rC) binding protein 2 binds to stem-loop IV of the poliovirus RNA 5' noncoding region: identification by automated liquid chromatography-tandem mass spectrometry
The cellular polypeptide p57 (p

In [6]:
# 9.3.entrez_example_1 / Entrez.einfo() 메서드로 Entrez의 데이터베이스 목록 출력하기

from Bio import Entrez

Entrez.email = 'qnfrdms2003@gmail.com'
handle = Entrez.einfo()
result = handle.read()
print(result)

b'<?xml version="1.0" encoding="UTF-8" ?>\n<!DOCTYPE eInfoResult PUBLIC "-//NLM//DTD einfo 20190110//EN" "https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20190110/einfo.dtd">\n<eInfoResult>\n<DbList>\n\n\t<DbName>pubmed</DbName>\n\t<DbName>protein</DbName>\n\t<DbName>nuccore</DbName>\n\t<DbName>ipg</DbName>\n\t<DbName>nucleotide</DbName>\n\t<DbName>structure</DbName>\n\t<DbName>genome</DbName>\n\t<DbName>annotinfo</DbName>\n\t<DbName>assembly</DbName>\n\t<DbName>bioproject</DbName>\n\t<DbName>biosample</DbName>\n\t<DbName>blastdbinfo</DbName>\n\t<DbName>books</DbName>\n\t<DbName>cdd</DbName>\n\t<DbName>clinvar</DbName>\n\t<DbName>gap</DbName>\n\t<DbName>gapplus</DbName>\n\t<DbName>grasp</DbName>\n\t<DbName>dbvar</DbName>\n\t<DbName>gene</DbName>\n\t<DbName>gds</DbName>\n\t<DbName>geoprofiles</DbName>\n\t<DbName>homologene</DbName>\n\t<DbName>medgen</DbName>\n\t<DbName>mesh</DbName>\n\t<DbName>nlmcatalog</DbName>\n\t<DbName>omim</DbName>\n\t<DbName>orgtrack</DbName>\n\t<DbName>pmc</DbName>\n

In [7]:
# 9.3.entrez.einfo_example / Entrez.einfo() 메서드로 Entrez의 데이터베이스 개수 출력하기

from Bio import Entrez

Entrez.email = 'qnfrdms2003@gmail.com'
handle = Entrez.einfo()
record = Entrez.read(handle)

print(record)

print(len(record['DbList']))

{'DbList': ['pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'structure', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'gap', 'gapplus', 'grasp', 'dbvar', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'mesh', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'proteinclusters', 'pcassay', 'protfam', 'pccompound', 'pcsubstance', 'seqannot', 'snp', 'sra', 'taxonomy', 'biocollections', 'gtr']}
41


In [9]:
# 9.4.entrez.esearch_example / Entrez.esearch() 메서드로 Entrez의 데이터베이스 개수 출력하기

from Bio import Entrez

Entrez.email = 'qnfrdms2003@gmail.com'
handle = Entrez.esearch(db = 'pubmed', term = 'metagenome')
record = Entrez.read(handle)
print(record['Count'])

33441


In [14]:
# EX
# 1
from Bio import Entrez

Entrez.email = 'qnfrdms2003@gmail.com'
handle = Entrez.efetch(db = 'nucleotide', id = 'NC_001367.1', rettype = 'gb', retmode = 'xml')
records = Entrez.read(handle)
for record in records:
    print(record['GBSeq_locus'])
    print(record['GBSeq_definition'])
    print(record['GBSeq_strandedness'], record['GBSeq_moltype'])
    print(record['GBSeq_length'], 'bp')
    print(len(record['GBSeq_references']), 'journals')

NC_001367
Tobacco mosaic virus, complete genome
single RNA
6395 bp
3 journals


In [18]:
# 2
from Bio import Entrez

Entrez.email = 'qnfrdms2003@gmail.com'
handle = Entrez.esearch(db = 'pubmed', term = 'bioinformatics')
record = Entrez.read(handle)
print(record['Count'])

484132
