In [1]:
from Bio import Entrez, SeqIO
Entrez.email = 'mdiaz67@gatech.edu'

## Chloroquine resistance transporter (CRT) gene in Plasmodium falciparum

### Query NCBI database using BioPython and collect necessary records IDs for record retrieval. In this case more than 400 records downloaded.

In [3]:
handle = Entrez.esearch(db='nucleotide', term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]')
rec_list = Entrez.read(handle)
if rec_list['RetMax'] < rec_list['Count']: #override max results returned of 20
    handle = Entrez.esearch(db='nucleotide', term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]', retmax=rec_list['Count'])
    rec_list = Entrez.read(handle)

### Download the matching nT sequences from GenBank. Conversion of the iterator to a list makes it easier to analyze multiple times.

In [6]:
id_list = rec_list['IdList']
hdl = Entrez.efetch(db='nucleotide', id=id_list, rettype='gb')
recs = list(SeqIO.parse(hdl, 'gb')) #read and parse result as a list

### Concentrate on single record 

In [8]:
for rec in recs:
    if rec.name == 'KM288867':
        break 
print(rec.name)
print(rec.description)

MW275057
Plasmodium falciparum isolate OM-17-304 chloroquine resistance transporter (crt) gene, partial cds


### Variable of interest, CRT gene, is isolated. It can be examined further.

In [9]:
for feature in rec.features: 
    if feature.type == 'gene':
        print(feature.qualifiers['gene']) 
    elif feature.type == 'exon':
        loc = feature.location
        print(loc.start, loc.end, loc.strand) 
    else:
        print('not processed:\n%s' % feature)

not processed:
type: source
location: [0:3096](+)
qualifiers:
    Key: collected_by, Value: ['AFRIMS']
    Key: country, Value: ['Cambodia']
    Key: db_xref, Value: ['taxon:5833']
    Key: isolate, Value: ['OM-17-304']
    Key: isolation_source, Value: ['leukocyte-depleted venous blood']
    Key: lat_lon, Value: ['14.17 N 103.63 E']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Plasmodium falciparum']

['crt']
not processed:
type: mRNA
location: join{[<0:91](+), [268:537](+), [716:889](+), [1061:1194](+), [1347:1419](+), [1542:1618](+), [1714:1797](+), [1924:1975](+), [2112:2169](+), [2317:2410](+), [2603:2648](+), [2795:2850](+), [3019:>3096](+)}
qualifiers:
    Key: gene, Value: ['crt']
    Key: product, Value: ['chloroquine resistance transporter']

not processed:
type: CDS
location: join{[<0:91](+), [268:537](+), [716:889](+), [1061:1194](+), [1347:1419](+), [1542:1618](+), [1714:1797](+), [1924:1975](+), [2112:2169](+), [2317:2410](+), [2603:2648](+), [279