In [19]:
import requests   # best library to manage HTTP transactions
import json       # library to convert JSON to Python data structures
from time import sleep
import csv        # library to read/write/parse CSV files

# NCBI identification requirements:
# tool name and email address should be sent with all requests
# see https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
email_address = 'steve.baskauf@vanderbilt.edu' # put your email address here
tool_name = 'VanderPub' # give your application a name here

accept_mime = 'application/json'
fetch_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

def retrieve_pubmed_data(fetch_url, accept_mime, email_address, tool_name, search_string):
    header_dict = {
        'Accept': accept_mime
        }
    param_dict = {
        'tool': tool_name, 
        'email': email_address,
        'db': 'pubmed', 
        'retmode': 'json', 
        'usehistory': 'n',
        'retmax': '5000',
        #'retmax': '2', # limit to 2 results when testing
        'term': search_string
        }
    response = requests.get(fetch_url, headers=header_dict, params=param_dict)    
    #print(response.url)
    if response.status_code == 404:
        pub_data = {} # return an empty dict if the constructed URL won't dereference
    else:
        pub_data = response.json()
        #print(json.dumps(data, indent = 2)) # uncomment to look at returned JSON

    # See https://www.ncbi.nlm.nih.gov/books/NBK25497/ for usage guidelines. 
    # An API key is required for more than 3 requests per second.
    sleep(0.4) # wait 0.4 second before hitting the API again to avoid getting blocked
    return pub_data


In [25]:
# settings for the initial search
search_string = '(coronavirus disease 2019 OR covid-19 OR Sars-CoV-2 OR severe acute respiratory syndrome coronavirus 2) AND 2020/10/06[pdat]'

# get the results
data = retrieve_pubmed_data(fetch_url, accept_mime, email_address, tool_name, search_string)
print('done')

done


In [26]:
idlist = data['esearchresult']['idlist']
print(len(idlist))
print(idlist)

115
['33022901', '33022856', '33022845', '33022834', '33022818', '33022810', '33022806', '33022800', '33022790', '33022781', '33022779', '33022765', '33022757', '33022755', '33022754', '33022182', '33022134', '33022124', '33022106', '33022093', '33022076', '33022023', '33022019', '33022016', '33022015', '33022014', '33022013', '33022004', '33021985', '33021927', '33021925', '33021901', '33021896', '33021869', '33021868', '33021856', '33021847', '33021846', '33021812', '33021761', '33021757', '33021739', '33021729', '33021718', '33021717', '33021713', '33021704', '33021703', '33021683', '33021673', '33021653', '33021631', '33021506', '33021464', '33021463', '33021445', '33021422', '33021400', '33021366', '33021342', '33021333', '33021332', '33021207', '33021206', '33021147', '33021144', '33021131', '33021102', '33021094', '33021051', '33020999', '33020967', '33020845', '33020836', '33020822', '33020815', '33019958', '33019956', '33019951', '33019916', '33019914', '33019894', '33019889',