In [None]:
# Ch08-1 Accessing Genbank and Navigating the NCBI

In [None]:
# 1. Import modules and configure email
from Bio import Entrez, SeqIO 
Entrez.email = 'put@your.email.here' 
# 2. Make output dir
! mkdir -p output

In [None]:
# 3. Find the CRT gene of Plasmodium
handle = Entrez.esearch(db='nucleotide', term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]') 
rec_list = Entrez.read(handle) 
if int(rec_list['RetMax']) < int(rec_list['Count']): 
    handle = Entrez.esearch(db='nucleotide', term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]', 
                            retmax=rec_list['Count'])
    rec_list = Entrez.read(handle) 

In [None]:
# 4. Retrieve records
id_list = rec_list['IdList'] 
hdl = Entrez.efetch(db='nucleotide', id=id_list, rettype='gb') 

In [None]:
# 5. Read and parse results
recs = list(SeqIO.parse(hdl, 'gb')) 

In [None]:
# 6. Review the record
for rec in recs: 
    if rec.name == 'KM288867':
        break 
print(rec.name) 
print(rec.description) 

In [None]:
# 7. Extract features
for feature in rec.features: 
    if feature.type == 'gene':
        print(feature.qualifiers['gene']) 
    elif feature.type == 'exon': 
        loc = feature.location 
        print(loc.start, loc.end, loc.strand) 
    else: 
        print('not processed:\n%s' % feature) 

In [None]:
# 8. Review annotations
for name, value in rec.annotations.items(): 
    print('%s=%s' % (name, value)) 

In [None]:
# 9.  Access the Sequence
print(len(rec.seq))

In [None]:
# 10.  There's More... (Pubmed search)
from Bio import Medline 
refs = rec.annotations['references'] 
for ref in refs: 
    if ref.pubmed_id != '': 
        print(ref.pubmed_id) 
        handle = Entrez.efetch(db='pubmed', id=[ref.pubmed_id], rettype='medline', retmode='text') 
        records = Medline.parse(handle) 
        for med_rec in records: 
            for k, v in med_rec.items(): 
                print('%s: %s' % (k, v)) 

In [None]:
## End of Notebook ##