In [31]:
from Bio import Entrez
import xmltodict
import time

# initialize some default parameters
Entrez.email = 'c.ke@lumc.nl' # provide your email address
db = 'snp'                              # set search to dbVar database
paramEutils = { 'usehistory':'Y' }        # Use Entrez search history to cache results

# generate query to Entrez eSearch
eSearch = Entrez.esearch(db=db, term='9[All Fields] AND 150524[All Fields]', **paramEutils) #A1 is G

# get eSearch result as dict object
eSresult = Entrez.read(eSearch)


In [32]:
eSresult

{'Count': '3', 'RetMax': '3', 'RetStart': '0', 'QueryKey': '1', 'WebEnv': 'MCID_6164c9fb9516da5c014756b3', 'IdList': ['57398577', '41321045', '4025669'], 'TranslationSet': [], 'TranslationStack': [{'Term': '9[All Fields]', 'Field': 'All Fields', 'Count': '46577772', 'Explode': 'N'}, {'Term': '150524[All Fields]', 'Field': 'All Fields', 'Count': '14', 'Explode': 'N'}, 'AND'], 'QueryTranslation': '9[All Fields] AND 150524[All Fields]'}

In [33]:
# review results 
for k in eSresult:
    print (k, ":", eSresult[k])
    
#Output: Web environment (&WebEnv) and query key (&query_key) parameters specifying the location on the Entrez history server of the list of UIDs matching the Entrez query
#https://www.ncbi.nlm.nih.gov/books/NBK25500/#chapter1.Storing_Search_Results

Count : 3
RetMax : 3
RetStart : 0
QueryKey : 1
WebEnv : MCID_6164c9fb9516da5c014756b3
IdList : ['57398577', '41321045', '4025669']
TranslationSet : []
TranslationStack : [{'Term': '9[All Fields]', 'Field': 'All Fields', 'Count': '46577772', 'Explode': 'N'}, {'Term': '150524[All Fields]', 'Field': 'All Fields', 'Count': '14', 'Explode': 'N'}, 'AND']
QueryTranslation : 9[All Fields] AND 150524[All Fields]


In [34]:
# get the WebEnv session cookie, and the QueryKey:

webenv = eSresult["WebEnv"]
query_key = eSresult["QueryKey"]
total_count = int(eSresult["Count"])
query_key = eSresult["QueryKey"]
retmax = 1 # return 2 rs per batch example

In [36]:
Entrez.efetch(db="snp",#rettype="uilist", #available types [uilist | xml (use retmode=xml))
             retmode="text",
             query_key= )

range(0, 3)

In [44]:
# sample codes adopted with modifications from http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc139.
fetch_count = 0
for start in range(0, total_count, retmax):
    end = min(total_count, start+retmax)
    print("Going to download record %i to %i" % (start+1, end))
    attempt = 0
    #fetch_count += 1
    while (attempt < 3):
        attempt += 1
        try:
            fetch_handle = Entrez.efetch(db="snp",
                                         #rettype="uilist", #available types [uilist | xml (use retmode=xml))
                                         retmode="xml",
                                         rettype="chr",
                                         retstart=start,
                                         retmax=retmax,
                                         webenv=webenv,
                                         query_key=query_key )
        except HTTPError as err:
            if 500 <= err.code <= 599:
                print("Received error from server %s" % err)
                print("Attempt %i of 3" % attempt)
                time.sleep(15)
            else:
                raise
    if (fetch_handle):
        #print(fetch_handle)            
        data = xmltodict.parse(fetch_handle.read())
        print(data)
        fetch_handle.close()

Going to download record 1 to 1
OrderedDict([('ExchangeSet', OrderedDict([('@xmlns:xsi', 'https://www.w3.org/2001/XMLSchema-instance'), ('@xmlns', 'https://www.ncbi.nlm.nih.gov/SNP/docsum'), ('@xsi:schemaLocation', 'https://www.ncbi.nlm.nih.gov/SNP/docsum ftp://ftp.ncbi.nlm.nih.gov/snp/specs/docsum_eutils.xsd'), ('DocumentSummary', OrderedDict([('@uid', '57398577'), ('SNP_ID', '4025669'), ('ALLELE_ORIGIN', None), ('GLOBAL_MAFS', OrderedDict([('MAF', [OrderedDict([('STUDY', 'KOREAN'), ('FREQ', 'G=0.30902/898')]), OrderedDict([('STUDY', 'Korea1K'), ('FREQ', 'G=0.30887/564')]), OrderedDict([('STUDY', 'NorthernSweden'), ('FREQ', 'G=0.26667/160')]), OrderedDict([('STUDY', 'Qatari'), ('FREQ', 'G=0.20833/45')]), OrderedDict([('STUDY', 'SGDP_PRJ'), ('FREQ', 'G=0.13693/66')]), OrderedDict([('STUDY', 'TOMMO'), ('FREQ', 'G=0.30246/5068')]), OrderedDict([('STUDY', 'ALFA'), ('FREQ', 'G=0.33071/1809')])])])), ('GLOBAL_POPULATION', None), ('GLOBAL_SAMPLESIZE', '0'), ('SUSPECTED', None), ('CLINICAL_SI

In [41]:
fetch_handle

<_io.TextIOWrapper encoding='UTF-8'>