In [24]:
import os
import sys
import json
import requests
import pandas as pd

def add_modules():
    """
    Starting at the current directory and proceeding up the file system
    tree, search for a directory named `modules`.  If found, and if not
    already there, add to the Python module search path.
    
    Params: None
    
    Return: None
    """
    directory = "."
    levels = 0
    while not os.path.isdir(os.path.join(directory, "modules")) and \
          levels < 5:
        directory = os.path.join(directory, "..")
        levels += 1
    module_path = os.path.abspath(os.path.join(directory, "modules"))
    if os.path.isdir(module_path):
        if not module_path in sys.path:
            sys.path.append(module_path)

add_modules()
import util

In [25]:
def buildURLlist(apiKey, bookList):
    """
    This function builds the url necessary in order to retrieve what books are currently on the best sellers list
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                bookList, a string of the specific Best Sellers List (ex. Hardcover Fiction)
                
    Returns: url, a string that contains the correct path in order to get the current books on the specified best sellers list
    """
    urlBody = "https://api.nytimes.com/svc/books/v3/lists.json"
    query = f"?list={bookList}&api-key={apiKey}"
    url = urlBody + query
    return url

In [26]:
def buildURLdate(apikey, bookList, date):
    """
    This function builds the url necessary in order to retrieve what books were on the best sellers list at the date specified
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                bookList, a string of the specific Best Sellers List (ex. Hardcover Fiction)
                date, a string in year-month-day format where year is 4 characters and month and day are 2 characters
                
    Returns: url, a string that contains the correct path in order to get the books that were on the specified best sellers list at the date given
    """
    urlBody = f"https://api.nytimes.com/svc/books/v3/lists/{date}/{bookList}.json"
    query = f"?api-key={apikey}"
    url = urlBody + query
    return url

In [27]:
def buildURLhistory(apiKey, isbn):
    """
    This function builds the url necessary in order to retrieve data about a specific book identified by the ISBN
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                isbn, a string of the specific book's ISBN
                
    Returns: url, a string that contains the correct path in order to get the data pertaining to the specific book given by the ISBN
    """
    urlBody = "https://api.nytimes.com/svc/books/v3/lists/best-sellers/history.json"
    query = f"?isbn={isbn}&api-key={apiKey}"
    url = urlBody + query;
    return url

In [28]:
def buildURL(apiKey, bookList = None, date = None, isbn = None):
    """
    This function uses the given parameters to decide which url to build
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                bookList, a string of the specific Best Sellers List (ex. Hardcover Fiction)
                date, a string in year-month-day format where year is 4 characters and month and day are 2 characters
                isbn, a string of the specific book's ISBN
                
    Returns: url, a string that contains the correct path in order to get the correct information by which parameters are given
    """
    if(bookList == None and date == None and isbn == None):
        return None
    if(bookList != None):
        if(date != None):
            return(buildURLdate(apiKey, bookList, date))
        else:
            return(buildURLlist(apiKey, bookList))
    else:
        return(buildURLhistory(apiKey, isbn))

In [29]:
def getListBook(apikey, bookL):
    """
    This function gets the json text of the Best Sellers list given
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                bookL, a string of the specific Best Sellers List (ex. Hardcover Fiction)
                
    A dictionary containing all of the books currently on the specified list
    
    """
    url = buildURL(apikey, bookList = bookL)
    headD = {
        "Accept": "applications/json"
    }
    
    resp = requests.get(url, headers=headD)
    #assert resp.status_code == 200
    print(resp.status_code)
    ds = resp.json()
    return ds['results']

In [30]:
def getID(apikey, bookList):
    """
    This function iterates through each book in the given Best Sellers list and creates a list of lists that contains the title, author, and ISBN 13
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                bookList, a string of the specific Best Sellers List (ex. Hardcover Fiction)
    
    Returns: A list of lists containing 3 items: the title, author, and ISBN 13 for each book on the bookList
    """
    listBooks = getListBook(apikey, bookList)
    BookList = []
    for book in listBooks:
        bookDets = book['book_details'][0]
        if(len(book['isbns']) > 0):
            bookID = book['isbns'][0]
            b = [bookDets['title'], bookDets['author'], bookID['isbn13']]
            BookList.append(b)
    return BookList

In [31]:
def getAllID(apikey):
    """
    This function iterates through all of the Best Sellers lists that are updated weekly and gets all of the title, author, and ISBN 13 data for each book currently on the lists. 
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
    
    Returns: A list of lists containing all of the books currently on any weekly updated Best Seller list
    """
    all_ids = []
    all_books = getAllLists(apikey)
    for item in all_books:
        book = getID(apikey, item)
        while(book == []):
            book = getID(apikey, item)
        for values in book:
            all_ids.append(values)
        
    return all_ids

In [32]:
def getBookRank(apiKey, isbnNum):
    """
    This function takes in a book by ISBN and determines the number of weeks that book has been on a certain list
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                isbnNum, a string of the specific book's ISBN    
    
    Returns: The name of the Best Sellers list it is on and how many weeks it has been on that list
    """
    u = buildURL(apiKey, isbn=isbnNum)
    headD = {
        "Accept": "applications/json"
    }
    r = requests.get(u, headers=headD)
    assert r.status_code == 200
    
    ds = r.json()
    
    if(len(ds['results']) == 0):
        return None
      
    lastRank = ds['results'][0]['ranks_history'][0]
    listName = lastRank['list_name']
    weeksOnList = lastRank['weeks_on_list']
    return listName, weeksOnList
    

In [33]:
def getAllLists(apikey):
    """
    This function gets a list containing all of the names of all of the weekly updated Best Sellers lists
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
    
    Returns: A list containg all of the names of the Best Sellers lists that are updated each week
    """
    totalListURL = f"https://api.nytimes.com/svc/books/v3/lists/names.json?api-key={apikey}"
    headD = {
    "Accept": "application/json"
    }
    result = requests.get(totalListURL, headD)
    r = result.json()
    allList = []
    try:
        data = r['results']
        for i in range(len(data)):
            d = data[i]
            if(d["updated"] == "WEEKLY"):
                allList.append(d['list_name'])
    except:
        allList.append(None)
    return allList

In [34]:
def get2020Sundays():
    '''
    This function returns the list of sundays in 2020.
    
    Parameters: None
    
    Return: list of sundays in 2020.
    '''
    firstSunday = {
        "01": [4, 4],
        "02": [1, 5],
        "03": [7, 4],
        "04": [4, 4], 
        "05": [2, 5],
        "06": [6, 4],
        "07": [4, 4], 
        "08": [1, 5], 
        "09": [6, 4], 
        "10": [3, 5], 
        "11": [7, 4], 
        "12": [5, 4]
    }

    sunday2020 = []
    for date in firstSunday:
        day = int(firstSunday[date][0])
        sunNum = int(firstSunday[date][1])
        for i in range(sunNum):
            day2 = day + 7*i
            if(day2 < 10):
                day2 = "0" + str(day2)
            fullDate = f"2020-{date}-{day2}"
            sunday2020.append(fullDate)
    return sunday2020

In [35]:
def getBooksbyDate(apikey, week, bstList):
    """
    This function gets all of the books that are on a specific Best Sellers list during a specific week
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                week, a string of specific week in year-month-day format
                bstList, a string of the specific Best Sellers List (ex. Hardcover Fiction)
                
    Returns: All of the books on the specified list during the specified week
    """
    u = buildURL(apikey, bookList=bstList, date=week)
    headD = {
        "Accept": "application/json"
    }
    r = requests.get(u, headers=headD)
    if(r.status_code == 200):
        data = r.json()
        return data['results']['books']
    return []

In [53]:
def getDateID(apiKey, week, bslist):
    """
    This function iterates through all of the books that were on the bslist during the given week and returns a list of lists containing the title, author, ISBN 13, the name of the 
    list and the number of weeks that the book has been on the list
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                week, a string of specific week in year-month-day format
                bsList, a string of the specific Best Sellers List (ex. Hardcover Fiction)
    
    Returns: This function returns a list of lists containing the title, author, ISBN 13, Best Seller list, and the number of weeks on the list of all of the books
                for the specified list and date
    """
    bstBooksPerList = getBooksbyDate(apiKey, week, bslist)
    listOfBooks = []
    
    for singleBook in bstBooksPerList:
        detailList = []
        detailList.append(singleBook['title'])
        detailList.append(singleBook['author'])
        detailList.append(singleBook['primary_isbn13'])
        
        detailList.append(bslist)
        detailList.append(singleBook['weeks_on_list'])
        
        listOfBooks.append(detailList)
    return listOfBooks

In [37]:
def dfYear(apiKey, yearList):
    """
    This function iterates through the yearList in reverse order and creates a list that contains all of the id information for each book. The id information collected is the author, title,
    ISBN 13, Best Sellers list, and how many weeks that book has been on the best sellers list
    
    Parameters: apiKey, a string holding the apikey in order to authorize the request
                yearList, a list that contains all of the sundays in a specific year
                
    Returns: A list of list containg all of the id information for each book on a weekly updated best selling list for a single year
    """
    week = 1
    all_books = getAllLists(apiKey)
    all_ids = []
    for date in reversed(yearList):
        for bList in all_books:
            book = getDateID(apiKey, date, bList)
            for values in book:
                all_ids.append(values)
        print("Week: ", week)
        week = week + 1
    return all_ids

In [55]:
def tidyTable(table):
    """
    This function takes in a table and sets the column names to match the id information collected in previous functions, removes any duplicates by ISBN, and sets the index
    to be the ISBN
    
    Parameters: table, a dataframe with book information in it
    
    Returns: A tidy dataframe
    """
    df = table.copy()
    df.columns = ["Title", "Author", "ISBN", "List", "Weeks on List"]
    df.drop_duplicates('ISBN', inplace = True)
    df.set_index('ISBN', inplace = True)
    return df

In [54]:
def createCSV(table, filename, datadir):
    """
    This function creates a csv file names filename in datadir
    
    Parameters: table, a pandas dataframe
                filename, a string for the name of the output file
                datadir, a string containing the directory where the file should be placed
    
    Returns: None
    """
    table.to_csv(path_or_buf=os.path.join(datadir, filename), index=True)

In [51]:
def main():
    """
    This function reads in the api key, makes a table with all of the books that made it on a Best Sellers list in 2020, tidies that table, and then creates an external csv from the tidied
    dataframe. 
    
    Parameters: None
    
    Returns: None
    """
    nyt = util.read_creds("NYT", ".", "Bookcreds.json")
    myapikey = nyt['api-key']
    
    table = pd.DataFrame(dfYear(myapikey, get2020Sundays()))
    tidy_table = tidyTable(table)
    
    createCSV(tidy_table, "New York Times Book List.csv", os.getcwd())

In [52]:
main()

Week:  1
Week:  2
Week:  3
Week:  4
Week:  5
Week:  6
Week:  7
Week:  8
Week:  9
Week:  10
Week:  11
Week:  12
Week:  13
Week:  14
Week:  15
Week:  16
Week:  17
Week:  18
Week:  19
Week:  20
Week:  21
Week:  22
Week:  23
Week:  24
Week:  25
Week:  26
Week:  27
Week:  28
Week:  29
Week:  30
Week:  31
Week:  32
Week:  33
Week:  34
Week:  35
Week:  36
Week:  37
Week:  38
Week:  39
Week:  40
Week:  41
Week:  42
Week:  43
Week:  44
Week:  45
Week:  46
Week:  47
Week:  48
Week:  49
Week:  50
Week:  51
Week:  52
