In [1]:
from Bio import Entrez, Medline, SeqIO, Seq

In [2]:
Entrez.email = 'kimhippowork@gmail.com'

In [3]:
handle = Entrez.einfo()
rec    = Entrez.read(handle)
print(rec)

{'DbList': ['pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'structure', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'gap', 'gapplus', 'grasp', 'dbvar', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'mesh', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'proteinclusters', 'pcassay', 'protfam', 'pccompound', 'pcsubstance', 'seqannot', 'snp', 'sra', 'taxonomy', 'biocollections', 'gtr']}


In [4]:
handle   = Entrez.esearch(db = 'nucleotide', term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]')
rec_list = Entrez.read(handle)

if rec_list['RetMax'] < rec_list['Count']:
    handle = Entrez.esearch(db = 'nucleotide', term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]',
                           retmax = rec_list['Count'])
    
    rec_list = Entrez.read(handle)

In [5]:
id_list = rec_list['IdList']
handle  = Entrez.efetch(db = 'nucleotide', id = id_list, 
                        rettype = 'gb', retmax = rec_list['Count'])

In [6]:
recs = list(SeqIO.parse(handle, 'gb'))
for rec in recs:
    if rec.name == 'KM288867': break

In [7]:
print(rec.name)
print(rec.description)

KM288867
Plasmodium falciparum clone PF3D7_0709000 chloroquine resistance transporter (CRT) gene, complete cds


In [8]:
for feature in rec.features:
    if feature.type   == 'gene': print(feature.qualifiers['gene'])
    elif feature.type == 'exon':
        loc = feature.location
        print(f'Exon : {loc.start}, {loc.end}, {loc.strand}')
        
    else: print(f'not precessed :\n{feature}')

not precessed :
type: source
location: [0:10000](+)
qualifiers:
    Key: clone, Value: ['PF3D7_0709000']
    Key: db_xref, Value: ['taxon:5833']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Plasmodium falciparum']

['CRT']
not precessed :
type: mRNA
location: join{[2751:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:7598](+)}
qualifiers:
    Key: gene, Value: ['CRT']
    Key: product, Value: ['chloroquine resistance transporter']

not precessed :
type: 5'UTR
location: [2751:3452](+)
qualifiers:
    Key: gene, Value: ['CRT']

not precessed :
type: primer_bind
location: [2935:2958](+)
qualifiers:

not precessed :
type: primer_bind
location: [3094:3121](+)
qualifiers:

not precessed :
type: CDS
location: join{[3452:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5

In [9]:
sequence = rec.seq[:50]
print(f'       DNA : {sequence}')
print(f'       RNA : {sequence.transcribe()}')
print(f'amino acid : {sequence.transcribe().translate()}')

       DNA : ATATGTAAAACCAAAATAAATTAAACAGAATTTATTTTTAAAAGATTTAT
       RNA : AUAUGUAAAACCAAAAUAAAUUAAACAGAAUUUAUUUUUAAAAGAUUUAU
amino acid : ICKTKIN*TEFIFKRF




In [11]:
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import functools