In [None]:
import requests   # best library to manage HTTP transactions
import json       # library to convert JSON to Python data structures
from time import sleep
from datetime import date
import datetime
import csv        # library to read/write/parse CSV files

# NCBI identification requirements:
# tool name and email address should be sent with all requests
# see https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
email_address = 'steve.baskauf@vanderbilt.edu' # put your email address here
tool_name = 'VanderPub' # give your application a name here

accept_mime = 'application/json'
fetch_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

def retrieve_pubmed_data(fetch_url, accept_mime, email_address, tool_name, search_string):
    header_dict = {
        'Accept': accept_mime
        }
    param_dict = {
        'tool': tool_name, 
        'email': email_address,
        'db': 'pubmed', 
        'retmode': 'json', 
        'usehistory': 'n',
        'retmax': '10000',
        #'retmax': '2', # limit to 2 results when testing
        'term': search_string
        }
    response = requests.get(fetch_url, headers=header_dict, params=param_dict)    
    #print(response.url)
    if response.status_code == 404:
        pub_data = {} # return an empty dict if the constructed URL won't dereference
    else:
        pub_data = response.json()
        #print(json.dumps(data, indent = 2)) # uncomment to look at returned JSON

    # See https://www.ncbi.nlm.nih.gov/books/NBK25497/ for usage guidelines. 
    # An API key is required for more than 3 requests per second.
    sleep(0.4) # wait 0.4 second before hitting the API again to avoid getting blocked
    return pub_data

def writeDictsToCsv(table, filename, fieldnames):
    with open(filename, 'w', newline='', encoding='utf-8') as csv_file_object:
        writer = csv.DictWriter(csv_file_object, fieldnames=fieldnames)
        writer.writeheader()
        for row in table:
            writer.writerow(row)


In [None]:
# Determine today's date as a number
today_day_of_year = date.today().timetuple().tm_yday
print('day number today: ', today_day_of_year)

# Determine day number for March 11
march_11_day_of_year = datetime.date(2020, 3, 11).timetuple().tm_yday
print('March 11 day number: ', march_11_day_of_year)

year = 2020

In [None]:
results_list = []
for days in range(march_11_day_of_year, today_day_of_year + 1):
    iterated_date = datetime.datetime(year, 1, 1) + datetime.timedelta(days - 1)
    iterated_date_string = iterated_date.strftime("%Y/%m/%d")
    
    # settings for the initial search
    search_string = '(coronavirus disease 2019 OR covid-19 OR Sars-CoV-2 OR severe acute respiratory syndrome coronavirus 2) AND ' + iterated_date_string + '[pdat]'

    # get the results
    data = retrieve_pubmed_data(fetch_url, accept_mime, email_address, tool_name, search_string)
    idlist = data['esearchresult']['idlist']
    print(iterated_date_string, len(idlist))
    print()
    dictionary = {'date': iterated_date_string, 'count': len(idlist)}
    results_list.append(dictionary)

writeDictsToCsv(results_list, 'covid_results.csv', ['date', 'count'])
print('done')

# This code was for testing a single search; don't run

In [None]:
# settings for the initial search
search_string = '(coronavirus disease 2019 OR covid-19 OR Sars-CoV-2 OR severe acute respiratory syndrome coronavirus 2) AND 2020/10/06[pdat]'

# get the results
data = retrieve_pubmed_data(fetch_url, accept_mime, email_address, tool_name, search_string)
print('done')

In [None]:
idlist = data['esearchresult']['idlist']
print(len(idlist))
print(idlist)