In [3]:
# Dependencies
import requests
import pandas as pd
import pprint as pp
from api_keys import nyt_apikey, gbooks_apikey

In [6]:
# List of encoded NYT bestseller list names
nyt_lists = [
    'combined-print-and-e-book-fiction',
    'hardcover-fiction',
    'mass-market-paperback',
    'e-book-fiction',
    'hardcover-graphic-books',
    'paperback-graphic-books',
    'manga',
    'combined-print-fiction',
    'paperback-books',
    'audio-fiction',
    'graphic-books-and-manga',
]

# Empty list for book information
book_data = []

print("Beginning data retrieval....")

# Loop through NYT bestseller lists
for blist in nyt_lists:
    
    # URL for API call
    url = f"https://api.nytimes.com/svc/books/v3/lists/current/{blist}.json?api-key={nyt_apikey}"
    
    # Make request and store json response
    try:
        response = requests.get(url).json()

        list_name = response['results']['list_name']
        list_date = response['results']['published_date']

        # Loop through book results in the response
        for book in response['results']['books']:

            # Append book-related information
            book_data.append({
                'Title': book['title'],
                'Author': book['author'],
                'Publisher': book['publisher'],
                'Primary ISBN10': book['primary_isbn10'],
                'Primary ISBN13': book['primary_isbn13'],
                'NYT List': list_name,
                'NYT List Published Date': list_date, 
                'Weeks on NYT List': book['weeks_on_list']
            })
            
    # If bestseller list not found, skip
    except:
        pass
                
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning data retrieval....
-----------------------------
Data Retrieval Complete      
-----------------------------


In [7]:
# Store book information in pandas dataframe
book_df = pd.DataFrame(book_data)
book_df

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23
1,LESSONS IN CHEMISTRY,Bonnie Garmus,Doubleday,038554734X,9780385547345,Combined Print and E-Book Fiction,2023-04-09,20
2,DAISY JONES & THE SIX,Taylor Jenkins Reid,Ballantine,1524798649,9781524798642,Combined Print and E-Book Fiction,2023-04-09,12
3,IT ENDS WITH US,Colleen Hoover,Atria,1501110365,9781501110368,Combined Print and E-Book Fiction,2023-04-09,93
4,COUNTDOWN,James Patterson and Brendan DuBois,"Little, Brown",031645737X,9780316457378,Combined Print and E-Book Fiction,2023-04-09,1
...,...,...,...,...,...,...,...,...
130,"TOMORROW, AND TOMORROW, AND TOMORROW",Gabrielle Zevin,Random House Audio,0593591631,9780593591635,Audio Fiction,2023-03-12,0
131,DAISY JONES & THE SIX,Taylor Jenkins Reid,Random House Audio,,9781984845306,Audio Fiction,2023-03-12,0
132,MAD HONEY,Jodi Picoult and Jennifer Finney Boylan,Random House Audio,0593614100,9780593614105,Audio Fiction,2023-03-12,0
133,THE LAST ORPHAN,Gregg Hurwitz,Macmillan Audio,,9781250788085,Audio Fiction,2023-03-12,0


In [8]:
# Check for duplicates
duplicates = book_df[book_df.duplicated(['Title', 'Author'], keep=False) == True]
duplicates

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23
1,LESSONS IN CHEMISTRY,Bonnie Garmus,Doubleday,038554734X,9780385547345,Combined Print and E-Book Fiction,2023-04-09,20
2,DAISY JONES & THE SIX,Taylor Jenkins Reid,Ballantine,1524798649,9781524798642,Combined Print and E-Book Fiction,2023-04-09,12
3,IT ENDS WITH US,Colleen Hoover,Atria,1501110365,9781501110368,Combined Print and E-Book Fiction,2023-04-09,93
4,COUNTDOWN,James Patterson and Brendan DuBois,"Little, Brown",031645737X,9780316457378,Combined Print and E-Book Fiction,2023-04-09,1
5,I WILL FIND YOU,Harlan Coben,Grand Central,1538748363,9781538748367,Combined Print and E-Book Fiction,2023-04-09,2
6,HELLO BEAUTIFUL,Ann Napolitano,Dial,0593243730,9780593243732,Combined Print and E-Book Fiction,2023-04-09,2
7,THE SEVEN HUSBANDS OF EVELYN HUGO,Taylor Jenkins Reid,Washington Square/Atria,1501161938,9781501161933,Combined Print and E-Book Fiction,2023-04-09,90
8,VERITY,Colleen Hoover,Grand Central,1538724731,9781538724736,Combined Print and E-Book Fiction,2023-04-09,68
11,SMOLDER,Laurell K. Hamilton,Berkley,1984804510,9781984804518,Combined Print and E-Book Fiction,2023-04-09,1


In [9]:
# Pull a specific book from the duplicates list for easier reading
# It looks like books that are both hardcover and audio are responsible for the majority of duplicates   
x = book_df[book_df['Title'] == 'IT STARTS WITH US']
x

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225.0,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23
122,IT STARTS WITH US,Colleen Hoover,Simon & Schuster Audio,,9781797145082,Audio Fiction,2023-03-12,0


In [10]:
# Create a new dataframe with duplicates removed
books_clean = book_df.drop_duplicates(subset=['Title', 'Author'], keep='first')
books_clean.reset_index(inplace=True, drop=True)
books_clean

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23
1,LESSONS IN CHEMISTRY,Bonnie Garmus,Doubleday,038554734X,9780385547345,Combined Print and E-Book Fiction,2023-04-09,20
2,DAISY JONES & THE SIX,Taylor Jenkins Reid,Ballantine,1524798649,9781524798642,Combined Print and E-Book Fiction,2023-04-09,12
3,IT ENDS WITH US,Colleen Hoover,Atria,1501110365,9781501110368,Combined Print and E-Book Fiction,2023-04-09,93
4,COUNTDOWN,James Patterson and Brendan DuBois,"Little, Brown",031645737X,9780316457378,Combined Print and E-Book Fiction,2023-04-09,1
...,...,...,...,...,...,...,...,...
111,OUT OF MY MIND,Sharon M. Draper,Atheneum,1416971718,9781416971719,Paperback Books,2012-12-09,24
112,ENCORE IN DEATH,J.D. Robb,Macmillan Audio,,9781250878267,Audio Fiction,2023-03-12,0
113,FAIRY TALE,Stephen King,Simon & Schuster Audio,,9781797145297,Audio Fiction,2023-03-12,0
114,THE LAST ORPHAN,Gregg Hurwitz,Macmillan Audio,,9781250788085,Audio Fiction,2023-03-12,0


In [19]:
# Copy books_clean (avoids a red warning message) and add columns for Google Book reviews and review count
gbooks_df = books_clean.copy()
gbooks_df['Google Review'] = ''
gbooks_df['Google Review Count'] = ''
gbooks_df

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List,Google Review,Google Review Count
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23,,
1,LESSONS IN CHEMISTRY,Bonnie Garmus,Doubleday,038554734X,9780385547345,Combined Print and E-Book Fiction,2023-04-09,20,,
2,DAISY JONES & THE SIX,Taylor Jenkins Reid,Ballantine,1524798649,9781524798642,Combined Print and E-Book Fiction,2023-04-09,12,,
3,IT ENDS WITH US,Colleen Hoover,Atria,1501110365,9781501110368,Combined Print and E-Book Fiction,2023-04-09,93,,
4,COUNTDOWN,James Patterson and Brendan DuBois,"Little, Brown",031645737X,9780316457378,Combined Print and E-Book Fiction,2023-04-09,1,,
...,...,...,...,...,...,...,...,...,...,...
111,OUT OF MY MIND,Sharon M. Draper,Atheneum,1416971718,9781416971719,Paperback Books,2012-12-09,24,,
112,ENCORE IN DEATH,J.D. Robb,Macmillan Audio,,9781250878267,Audio Fiction,2023-03-12,0,,
113,FAIRY TALE,Stephen King,Simon & Schuster Audio,,9781797145297,Audio Fiction,2023-03-12,0,,
114,THE LAST ORPHAN,Gregg Hurwitz,Macmillan Audio,,9781250788085,Audio Fiction,2023-03-12,0,,


In [None]:
# Rename google review column to 'average google review'

gbooks_df = gbooks_df.rename(columns={'Google Review': 'Average Google Review'})


In [32]:
# Loop through dataframe
for index, row in gbooks_df.iterrows():
    
    # Search parameter is ISBN13
    isbn = gbooks_df.loc[index,'Primary ISBN13']
    
    # URL for API request
    gurl = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}&key={gbooks_apikey}"
    
    # Make request
    response = requests.get(gurl).json()

    try:
        
        # Pull rating and rating count from response
        rating = response['items'][0]['volumeInfo']['averageRating']
        review_count = response['items'][0]['volumeInfo']['ratingsCount']

        # Add rating and review count to gbooks dataframe
        gbooks_df.loc[index, 'Average Google Review'] = rating
        gbooks_df.loc[index, 'Google Review Count'] = review_count
    
    # If no average Rating, pass
    except KeyError:
        pass
    

In [33]:
gbooks_df

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List,Average Google Review,Google Review Count
0,IT STARTS WITH US,Colleen Hoover,Atria,1668001225,9781668001226,Combined Print and E-Book Fiction,2023-04-09,23,4,1
1,LESSONS IN CHEMISTRY,Bonnie Garmus,Doubleday,038554734X,9780385547345,Combined Print and E-Book Fiction,2023-04-09,20,4,61
2,DAISY JONES & THE SIX,Taylor Jenkins Reid,Ballantine,1524798649,9781524798642,Combined Print and E-Book Fiction,2023-04-09,12,4,104
3,IT ENDS WITH US,Colleen Hoover,Atria,1501110365,9781501110368,Combined Print and E-Book Fiction,2023-04-09,93,4.5,5
4,COUNTDOWN,James Patterson and Brendan DuBois,"Little, Brown",031645737X,9780316457378,Combined Print and E-Book Fiction,2023-04-09,1,4,1
...,...,...,...,...,...,...,...,...,...,...
111,OUT OF MY MIND,Sharon M. Draper,Atheneum,1416971718,9781416971719,Paperback Books,2012-12-09,24,,
112,ENCORE IN DEATH,J.D. Robb,Macmillan Audio,,9781250878267,Audio Fiction,2023-03-12,0,,
113,FAIRY TALE,Stephen King,Simon & Schuster Audio,,9781797145297,Audio Fiction,2023-03-12,0,3.5,24
114,THE LAST ORPHAN,Gregg Hurwitz,Macmillan Audio,,9781250788085,Audio Fiction,2023-03-12,0,,


In [34]:
# Sort dataframe by weeks on nyt bestseller list 
gbooks_sorted = gbooks_df.sort_values(by='Weeks on NYT List', ascending=False)
gbooks_sorted

Unnamed: 0,Title,Author,Publisher,Primary ISBN10,Primary ISBN13,NYT List,NYT List Published Date,Weeks on NYT List,Average Google Review,Google Review Count
107,THE BOOK THIEF,Markus Zusak,Knopf,0375842209,9780375842207,Paperback Books,2012-12-09,272,4.5,2495
64,SMILE,Raina Telgemeier,Scholastic,0545132061,9780545132060,Paperback Graphic Books,2017-01-29,240,4.5,111
54,BATMAN: THE KILLING JOKE,Alan Moore and Brian Bolland,DC Comics,1401216676,9781401216672,Hardcover Graphic Books,2017-01-29,239,4,62
63,DRAMA,Raina Telgemeier,Scholastic,0545326990,9780545326995,Paperback Graphic Books,2017-01-29,179,4,3442
109,THE ABSOLUTELY TRUE DIARY OF A PART-TIME INDIAN,Sherman Alexie.,"Little, Brown",0316013692,9780316013697,Paperback Books,2012-12-09,152,,
...,...,...,...,...,...,...,...,...,...,...
90,GONE GIRL,Gillian Flynn,Crown,030758836X,9780307588364,Combined Print Fiction,2013-05-12,0,3.5,969
91,THE BEST OF ME,Nicholas Sparks,Grand Central,0446547638,9780446547635,Combined Print Fiction,2013-05-12,0,4,79
92,A GAME OF THRONES,George RR Martin,Bantam,0553573403,9780553573404,Combined Print Fiction,2013-05-12,0,4,3116
93,WORLD WAR Z,Max Brooks,Broadway,0307346617,9780307346612,Combined Print Fiction,2013-05-12,0,3.5,3845
