In [66]:
# Dependencies
import requests
import pandas as pd
import pprint as pp
import numpy as np
from datetime import date, timedelta, datetime
from time import sleep
from api_keys import nyt_apikey, gbooks_apikey

In [62]:
# Define a function that returns calendar dates of Sundays in the past, since the NYT Book Review is published on Sundays 
# The first argument, d, is the start date. The second argument, n, is the number of months prior to the start date. 
def getPreviousSunday(d, n):
    d -= timedelta(days = 7*4*n)
    return d

In [34]:
# URL for first NYT API call. This API call will give us information about all of NYT's bestseller lists  
url = f"https://api.nytimes.com/svc/books/v3/lists/names.json?api-key={nyt_apikey}"

# Make API call and store response in json
response = requests.get(url).json()

# Print response
pp.pprint(response)

{'copyright': 'Copyright (c) 2023 The New York Times Company.  All Rights '
              'Reserved.',
 'num_results': 59,
 'results': [{'display_name': 'Combined Print & E-Book Fiction',
              'list_name': 'Combined Print and E-Book Fiction',
              'list_name_encoded': 'combined-print-and-e-book-fiction',
              'newest_published_date': '2023-04-09',
              'oldest_published_date': '2011-02-13',
              'updated': 'WEEKLY'},
             {'display_name': 'Combined Print & E-Book Nonfiction',
              'list_name': 'Combined Print and E-Book Nonfiction',
              'list_name_encoded': 'combined-print-and-e-book-nonfiction',
              'newest_published_date': '2023-04-09',
              'oldest_published_date': '2011-02-13',
              'updated': 'WEEKLY'},
             {'display_name': 'Hardcover Fiction',
              'list_name': 'Hardcover Fiction',
              'list_name_encoded': 'hardcover-fiction',
              'newest_publi

In [33]:
cdate = date(2023,4,9)
search_dates = []

for x in range(1, 13):
    search_dates.append(getPreviousSunday(cdate, x))

search_dates

[datetime.date(2023, 3, 12),
 datetime.date(2023, 2, 12),
 datetime.date(2023, 1, 15),
 datetime.date(2022, 12, 18),
 datetime.date(2022, 11, 20),
 datetime.date(2022, 10, 23),
 datetime.date(2022, 9, 25),
 datetime.date(2022, 8, 28),
 datetime.date(2022, 7, 31),
 datetime.date(2022, 7, 3),
 datetime.date(2022, 6, 5),
 datetime.date(2022, 5, 8)]

In [80]:
# List of encoded NYT bestseller list names
# These are the lists that 1) haven't been discontinued; and 2) were introduced in 2011 or earlier
nyt_lists = [
    'combined-print-and-e-book-fiction',
    'combined-print-and-e-book-nonfiction',
    'hardcover-fiction',
    'hardcover-nonfiction',
    'trade-fiction-paperback',
    'paperback-nonfiction',
    'advice-how-to-and-miscellaneous',
    'picture-books',
    'series-books',
    'young-adult-hardcover'
]

# Set the first date to search for
first_date = date(2022,10,23)

# Build a list of dates going back in time from the start date
search_dates = []
search_dates.append(first_date)
for x in range(1, 10):
    search_dates.append(getPreviousSunday(first_date, x))

# Empty list into which we will append the book information contained in the API response 
book_data = []

print("Beginning data retrieval....")

# Loop through dates
for search_date in search_dates:

    # Loop through NYT bestseller lists
    for blist in nyt_lists:

        # Create URL string for API calls
        url = f"https://api.nytimes.com/svc/books/v3/lists/{search_date}/{blist}.json?api-key={nyt_apikey}"

        # Make API request and store response in json
        response = requests.get(url).json()

        # Error handling
        try:

            # Pull the name and date of the bestseller list
            list_name = response['results']['list_name']
            list_date = response['results']['published_date']

            # Loop through the books within the bestseller list
            for book in response['results']['books']:

                # Append book-related information
                book_data.append({
                    'Title': book['title'],
                    'Author': book['author'],
                    'Publisher': book['publisher'],
                    'Primary ISBN10': book['primary_isbn10'],
                    'Primary ISBN13': book['primary_isbn13'],
                    'NYT List': list_name,
                    'NYT List Published Date': list_date, 
                    'Weeks on NYT List': book['weeks_on_list']
                })
            
            # Pause for 30 seconds. Without the pause the API request fails.  
            sleep(30)

        # If the API call is unsuccessful, make a note of the list & date and skip
        except KeyError:
            print("Error in " + blist, search_date)
            pass

# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

# Store book information in pandas dataframe
book_df = pd.DataFrame(book_data)
book_df

Beginning data retrieval....
-----------------------------
Data Retrieval Complete      
-----------------------------


Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List
0,RIGHTEOUS PREY,John Sandford,Putnam,0593422481,9780593422489,Combined Print and E-Book Fiction,2022-10-23,1
1,VERITY,Colleen Hoover,Grand Central,1538724731,9781538724736,Combined Print and E-Book Fiction,2022-10-23,44
2,IT ENDS WITH US,Colleen Hoover,Atria,1501110365,9781501110368,Combined Print and E-Book Fiction,2022-10-23,69
3,MAD HONEY,Jodi Picoult and Jennifer Finney Boylan,Ballantine,1984818384,9781984818386,Combined Print and E-Book Fiction,2022-10-23,1
4,FAIRY TALE,Stephen King,Scribner,1668002175,9781668002179,Combined Print and E-Book Fiction,2022-10-23,5
...,...,...,...,...,...,...,...,...
1295,STAMPED,Jason Reynolds and Ibram X. Kendi,"Little, Brown",0316453692,9780316453691,Young Adult Hardcover,2022-02-13,81
1296,THESE VIOLENT DELIGHTS,Chloe Gong,Margaret K. McElderry,1534457690,9781534457690,Young Adult Hardcover,2022-02-13,36
1297,YOU'LL BE THE DEATH OF ME,Karen M. McManus,Delacorte,0593175867,9780593175866,Young Adult Hardcover,2022-02-13,9
1298,HERE'S TO US,Becky Albertalli and Adam Silvera,Quill Tree,0063071630,9780063071636,Young Adult Hardcover,2022-02-13,5


In [81]:
book_df.to_csv('Jan-Oct2022.csv')

In [41]:
# Check for duplicates
duplicates = book_df[book_df.duplicated(['Title', 'Author'], keep=False) == True]
duplicates

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23
1,LESSONS IN CHEMISTRY,Bonnie Garmus,Doubleday,038554734X,9780385547345,Combined Print and E-Book Fiction,2023-04-09,20
2,DAISY JONES & THE SIX,Taylor Jenkins Reid,Ballantine,1524798649,9781524798642,Combined Print and E-Book Fiction,2023-04-09,12
3,IT ENDS WITH US,Colleen Hoover,Atria,1501110365,9781501110368,Combined Print and E-Book Fiction,2023-04-09,93
4,COUNTDOWN,James Patterson and Brendan DuBois,"Little, Brown",031645737X,9780316457378,Combined Print and E-Book Fiction,2023-04-09,1
5,I WILL FIND YOU,Harlan Coben,Grand Central,1538748363,9781538748367,Combined Print and E-Book Fiction,2023-04-09,2
6,HELLO BEAUTIFUL,Ann Napolitano,Dial,0593243730,9780593243732,Combined Print and E-Book Fiction,2023-04-09,2
7,THE SEVEN HUSBANDS OF EVELYN HUGO,Taylor Jenkins Reid,Washington Square/Atria,1501161938,9781501161933,Combined Print and E-Book Fiction,2023-04-09,90
8,VERITY,Colleen Hoover,Grand Central,1538724731,9781538724736,Combined Print and E-Book Fiction,2023-04-09,68
11,SMOLDER,Laurell K. Hamilton,Berkley,1984804510,9781984804518,Combined Print and E-Book Fiction,2023-04-09,1


In [42]:
# Pull a specific book from the duplicates list for easier reading
# It looks like books that are both hardcover and audio are responsible for the majority of duplicates   
x = book_df[book_df['Title'] == 'IT STARTS WITH US']
x

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225.0,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23
122,IT STARTS WITH US,Colleen Hoover,Simon & Schuster Audio,,9781797145082,Audio Fiction,2023-03-12,0


In [43]:
# Create a new dataframe with duplicates removed
books_clean = book_df.drop_duplicates(subset=['Title', 'Author'], keep='first')
books_clean.reset_index(inplace=True, drop=True)
books_clean

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23
1,LESSONS IN CHEMISTRY,Bonnie Garmus,Doubleday,038554734X,9780385547345,Combined Print and E-Book Fiction,2023-04-09,20
2,DAISY JONES & THE SIX,Taylor Jenkins Reid,Ballantine,1524798649,9781524798642,Combined Print and E-Book Fiction,2023-04-09,12
3,IT ENDS WITH US,Colleen Hoover,Atria,1501110365,9781501110368,Combined Print and E-Book Fiction,2023-04-09,93
4,COUNTDOWN,James Patterson and Brendan DuBois,"Little, Brown",031645737X,9780316457378,Combined Print and E-Book Fiction,2023-04-09,1
...,...,...,...,...,...,...,...,...
152,BETTER THAN THE MOVIES,Lynn Painter,Simon & Schuster,1534467637,9781534467637,Young Adult Paperback Monthly,2023-03-12,0
153,WE WERE LIARS,E. Lockhart,Ember,0385741278,9780385741279,Young Adult Paperback Monthly,2023-03-12,0
154,YOU'D BE HOME NOW,Kathleen Glasgow,Ember,0525708073,9780525708070,Young Adult Paperback Monthly,2023-03-12,0
155,HIGHLY SUSPICIOUS AND UNFAIRLY CUTE,Talia Hibbert,Joy Revolution,0593482352,9780593482353,Young Adult Paperback Monthly,2023-03-12,0


In [69]:
books_clean['Publisher'].value_counts()

Scholastic                  14
VIZ Media                   12
Grand Central                7
Penguin Group                6
Ballantine                   5
                            ..
Top Shelf Productions        1
IDW Publishing               1
Pantheon                     1
Seven Seas Entertainment     1
Joy Revolution               1
Name: Publisher, Length: 78, dtype: int64

In [45]:
# Copy books_clean (avoids a red warning message) and add columns for Google Book reviews and review count
gbooks_df = books_clean.copy()
gbooks_df['Average Google Review'] = ''
gbooks_df['Google Review Count'] = ''
gbooks_df

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List,Average Google Review,Google Review Count
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23,,
1,LESSONS IN CHEMISTRY,Bonnie Garmus,Doubleday,038554734X,9780385547345,Combined Print and E-Book Fiction,2023-04-09,20,,
2,DAISY JONES & THE SIX,Taylor Jenkins Reid,Ballantine,1524798649,9781524798642,Combined Print and E-Book Fiction,2023-04-09,12,,
3,IT ENDS WITH US,Colleen Hoover,Atria,1501110365,9781501110368,Combined Print and E-Book Fiction,2023-04-09,93,,
4,COUNTDOWN,James Patterson and Brendan DuBois,"Little, Brown",031645737X,9780316457378,Combined Print and E-Book Fiction,2023-04-09,1,,
...,...,...,...,...,...,...,...,...,...,...
152,BETTER THAN THE MOVIES,Lynn Painter,Simon & Schuster,1534467637,9781534467637,Young Adult Paperback Monthly,2023-03-12,0,,
153,WE WERE LIARS,E. Lockhart,Ember,0385741278,9780385741279,Young Adult Paperback Monthly,2023-03-12,0,,
154,YOU'D BE HOME NOW,Kathleen Glasgow,Ember,0525708073,9780525708070,Young Adult Paperback Monthly,2023-03-12,0,,
155,HIGHLY SUSPICIOUS AND UNFAIRLY CUTE,Talia Hibbert,Joy Revolution,0593482352,9780593482353,Young Adult Paperback Monthly,2023-03-12,0,,


In [54]:
# Loop through dataframe
for index, row in gbooks_df.iterrows():
    
    # Search parameter is ISBN13
    isbn = gbooks_df.loc[index,'Primary ISBN13']
    
    # URL for API request
    gurl = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}&key={gbooks_apikey}"
    
    # Make request
    response = requests.get(gurl).json()

    try:
        
    # Pull rating and rating count from response
    rating = response['items'][0]['volumeInfo']['averageRating']
    review_count = response['items'][0]['volumeInfo']['ratingsCount']

    # Add rating and review count to gbooks dataframe
    gbooks_df.loc[index, 'Average Google Review'] = rating
    gbooks_df.loc[index, 'Google Review Count'] = review_count

    # If no average Rating, pass
    except KeyError:
        pass
    

KeyError: 'averageRating'

In [42]:
gbooks_df

NameError: name 'gbooks_df' is not defined

In [67]:
# Sort dataframe by weeks on nyt bestseller list
gbooks_df['Google Review Count'] = pd.to_numeric(gbooks_df['Google Review Count'])
gbooks_sorted = gbooks_df.sort_values(by='Google Review Count', ascending=False)
gbooks_sorted.head(50)

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List,Average Google Review,Google Review Count
133,MISS PEREGRINE'S HOME FOR PECULIAR CHILDREN,Ransom Riggs and Cassandra Jean,Quirk Books,1594746036,9781594746031,Young Adult,2015-08-23,108,3.5,3924.0
68,PERSEPOLIS,Marjane Satrapi,Pantheon,037571457X,9780375714573,Paperback Graphic Books,2017-01-29,81,4.0,3913.0
93,WORLD WAR Z,Max Brooks,Broadway,0307346617,9780307346612,Combined Print Fiction,2013-05-12,0,3.5,3842.0
39,THE GUERNSEY LITERARY AND POTATO PEEL PIE SOCIETY,Mary Ann Shaffer and Annie Barrows,Dial,0440337976,9780440337973,E-Book Fiction,2017-01-29,2,4.0,3808.0
87,BEAUTIFUL RUINS,Jess Walter,Harper Perennial,0061928178,9780061928178,Combined Print Fiction,2013-05-12,0,3.5,3659.0
108,THIRTEEN REASONS WHY,Jay Asher,Razorbill,159514188X,9781595141880,Paperback Books,2012-12-09,72,3.5,3656.0
63,DRAMA,Raina Telgemeier,Scholastic,0545326990,9780545326995,Paperback Graphic Books,2017-01-29,179,4.0,3442.0
92,A GAME OF THRONES,George RR Martin,Bantam,0553573403,9780553573404,Combined Print Fiction,2013-05-12,0,4.0,3116.0
44,A MAN CALLED OVE,Fredrik Backman,Atria,,9781476738031,E-Book Fiction,2017-01-29,3,4.5,3040.0
102,THE PERKS OF BEING A WALLFLOWER,Stephen Chbosky,MTV Books/Gallery Books,0671027344,9780671027346,Paperback Books,2012-12-09,25,4.0,2818.0


In [68]:
gbooks_df['Publisher'].value_counts()

Scholastic                  14
VIZ Media                   12
Grand Central                7
Penguin Group                6
Ballantine                   5
                            ..
Top Shelf Productions        1
IDW Publishing               1
Pantheon                     1
Seven Seas Entertainment     1
Joy Revolution               1
Name: Publisher, Length: 78, dtype: int64

In [49]:
isbn = '9781101569184'
gurl = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}&key={gbooks_apikey}"
response = requests.get(gurl).json()
pp.pprint(response)

{'items': [{'accessInfo': {'accessViewStatus': 'SAMPLE',
                           'country': 'US',
                           'embeddable': True,
                           'epub': {'acsTokenLink': 'http://books.google.com/books/download/The_Fault_in_Our_Stars-sample-epub.acsm?id=UzqVUdEtLDwC&format=epub&output=acs4_fulfillment_token&dl_type=sample&source=gbs_api',
                                    'isAvailable': True},
                           'pdf': {'isAvailable': False},
                           'publicDomain': False,
                           'quoteSharingAllowed': False,
                           'textToSpeechPermission': 'ALLOWED',
                           'viewability': 'PARTIAL',
                           'webReaderLink': 'http://play.google.com/books/reader?id=UzqVUdEtLDwC&hl=&source=gbs_api'},
            'etag': 'wtYho2VOJdQ',
            'id': 'UzqVUdEtLDwC',
            'kind': 'books#volume',
            'saleInfo': {'buyLink': 'https://play.google.com/store