In [1]:
# Dependencies
from datetime import date, timedelta, datetime
import requests
import pandas as pd
from time import sleep
from api_keys import nyt_apikey

In [2]:
# This function returns a date object that is 1 calendar month prior to the date provided
# The name of the function is due to the fact that the NYT book review is published on Sundays
def getPreviousSunday(d):
    
    if d.day >= 28:
        d -= timedelta(weeks = 5)
    else:
        d -= timedelta(weeks = 4)
    
    return d

In [None]:
# Set the first date to search for (here, the start date is April 9th, 2023)
# Adjust this date as needed to pull data from different years.
d = date(2023,4,9)

# Build a list of dates going back in time from the start date
search_dates = []
search_dates.append(d)

# Add previous months to the search dates (here, the search dates will include monthly data in the year 2023)
# Adjust as needed to pull data from different months in prior years
for x in range(3):
    d = getPreviousSunday(d)
    search_dates.append(d)

In [None]:
# List of encoded NYT bestseller list names
# These are the lists that 1) haven't been discontinued; and 2) were introduced in 2011 or earlier
nyt_lists = [
    'combined-print-and-e-book-fiction',
    'combined-print-and-e-book-nonfiction',
    'hardcover-fiction',
    'hardcover-nonfiction',
    'trade-fiction-paperback',
    'paperback-nonfiction',
    'advice-how-to-and-miscellaneous',
    'picture-books',
    'young-adult-hardcover',
    'series-books'
]

In [3]:
# Empty list into which we will append the book information contained in the API response 
book_data = []

# Print notification that data retrieval process has started
print("Beginning data retrieval....")

# Loop through search dates
for search_date in search_dates:

    # Loop through NYT bestseller lists
    for blist in nyt_lists:

        # Create URL string for API calls
        url = f"https://api.nytimes.com/svc/books/v3/lists/{search_date}/{blist}.json?api-key={nyt_apikey}"

        # Make API request and store response in json
        response = requests.get(url).json()

        # Try statement for error handling, in case there is a failed API request
        try:

            # Pull the name and date of the bestseller list
            list_name = response['results']['list_name']
            list_date = response['results']['published_date']

            # Loop through the books within the bestseller list
            for book in response['results']['books']:

                # Append book-related information
                book_data.append({
                    'Title': book['title'],
                    'Author': book['author'],
                    'Publisher': book['publisher'],
                    'Primary ISBN10': book['primary_isbn10'],
                    'Primary ISBN13': book['primary_isbn13'],
                    'NYT List': list_name,
                    'NYT List Published Date': list_date, 
                    'Weeks on NYT List': book['weeks_on_list']
                })
            
            # Pause before making another call, as the API request typically fails with instantaneous, repeated requests  
            sleep(20)

        # If the API call is unsuccessful, make a note of the list & date and skip
        except (KeyError, IndexError, TypeError) as error:
            print("Error in " + blist, search_date)
            pass

# Indicate that data retrieval is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning data retrieval....
-----------------------------
Data Retrieval Complete      
-----------------------------


In [4]:
# Store book information in pandas dataframe
book_df = pd.DataFrame(book_data)
book_df

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List
0,FINDERS KEEPERS,Stephen King,Scribner,1501100130,9781501100130,Combined Print and E-Book Fiction,2015-06-21,1
1,THE GIRL ON THE TRAIN,Paula Hawkins,Riverhead,0698185390,9780698185395,Combined Print and E-Book Fiction,2015-06-21,21
2,IN THE UNLIKELY EVENT,Judy Blume,Knopf,1101875054,9781101875056,Combined Print and E-Book Fiction,2015-06-21,1
3,ALL THE LIGHT WE CANNOT SEE,Anthony Doerr,Scribner,1476746583,9781476746586,Combined Print and E-Book Fiction,2015-06-21,38
4,RADIANT ANGEL,Nelson DeMille,Grand Central,145558231X,9781455582310,Combined Print and E-Book Fiction,2015-06-21,2
...,...,...,...,...,...,...,...,...
955,THE SELECTION,Kiera Cass,HarperTeen,0062060015,9780062060013,Series Books,2015-01-25,22
956,HEROES OF OLYMPUS,Rick Riordan,Disney Publishing Worldwide,1423146735,9781423146735,Series Books,2015-01-25,87
957,GIVER QUARTET,Lois Lowry,Houghton Mifflin Harcourt Publishing,0544336267,9780544336261,Series Books,2015-01-25,71
958,WINGS OF FIRE,Tui T. Sutherland,Scholastic,0545685346,9780545685344,Series Books,2015-01-25,4


In [5]:
# Save data frame to CSV
book_df.to_csv('NYT_Bestsellers_2023.csv')