In [2]:
import requests
import json
import pandas as pd
import numpy as np
import time
from helper_functions import aggregate, dictlist_to_df

## Example list information
I'm trying to get bestseller information from the nyt API. They have a variety of lists, and metadata on each list that they have. The metadata is stored in the '/lists/names.json' endpoint. Lets view this metadata first (it will help with looping functionality later when we need details from each sub-list) 

In [18]:
book_lists = requests.get('https://api.nytimes.com/svc/books/v3/lists/names.json?api-key=76quqESu9qYqfKpEfnJou4kpY1uJ9pMV')

In [19]:
listoflists = json.loads(book_lists.text)['results']

In [20]:
listoflists[0]

{'list_name': 'Combined Print and E-Book Fiction',
 'display_name': 'Combined Print & E-Book Fiction',
 'list_name_encoded': 'combined-print-and-e-book-fiction',
 'oldest_published_date': '2011-02-13',
 'newest_published_date': '2020-03-01',
 'updated': 'WEEKLY'}

In [21]:
aggregate(listoflists,'list_name')[:5]

['Combined Print and E-Book Fiction',
 'Combined Print and E-Book Nonfiction',
 'Hardcover Fiction',
 'Hardcover Nonfiction',
 'Trade Fiction Paperback']

In [22]:
# alternativley we can just read the dictionary:
lists_df = pd.DataFrame(listoflists)
lists = lists_df['list_name_encoded'].to_list()

In [23]:
lists

['combined-print-and-e-book-fiction',
 'combined-print-and-e-book-nonfiction',
 'hardcover-fiction',
 'hardcover-nonfiction',
 'trade-fiction-paperback',
 'mass-market-paperback',
 'paperback-nonfiction',
 'e-book-fiction',
 'e-book-nonfiction',
 'hardcover-advice',
 'paperback-advice',
 'advice-how-to-and-miscellaneous',
 'hardcover-graphic-books',
 'paperback-graphic-books',
 'manga',
 'combined-print-fiction',
 'combined-print-nonfiction',
 'chapter-books',
 'childrens-middle-grade',
 'childrens-middle-grade-e-book',
 'childrens-middle-grade-hardcover',
 'childrens-middle-grade-paperback',
 'paperback-books',
 'picture-books',
 'series-books',
 'young-adult',
 'young-adult-e-book',
 'young-adult-hardcover',
 'young-adult-paperback',
 'animals',
 'audio-fiction',
 'audio-nonfiction',
 'business-books',
 'celebrities',
 'crime-and-punishment',
 'culture',
 'education',
 'espionage',
 'expeditions-disasters-and-adventures',
 'fashion-manners-and-customs',
 'food-and-fitness',
 'games-a

## Get information from NYT API
Here we retreive information from a specific list. the current hardcover bestsellers.

In [24]:
a = requests.get('https://api.nytimes.com/svc/books/v3/lists/current/hardcover-fiction.json?api-key='+NYT_KEY)

In [25]:
dir(a)

['__attrs__',
 '__bool__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__nonzero__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_content',
 '_content_consumed',
 '_next',
 'apparent_encoding',
 'close',
 'connection',
 'content',
 'cookies',
 'elapsed',
 'encoding',
 'headers',
 'history',
 'is_permanent_redirect',
 'is_redirect',
 'iter_content',
 'iter_lines',
 'json',
 'links',
 'next',
 'ok',
 'raise_for_status',
 'raw',
 'reason',
 'request',
 'status_code',
 'text',
 'url']

In [26]:
b = json.loads(a.text)

In [27]:
b['results']['books'][0]

{'rank': 1,
 'rank_last_week': 4,
 'weeks_on_list': 76,
 'asterisk': 0,
 'dagger': 0,
 'primary_isbn10': '0735219095',
 'primary_isbn13': '9780735219090',
 'publisher': 'Putnam',
 'description': 'In a quiet town on the North Carolina coast in 1969, a young woman who survived alone in the marsh becomes a murder suspect.',
 'price': 0,
 'title': 'WHERE THE CRAWDADS SING',
 'author': 'Delia Owens',
 'contributor': 'by Delia Owens',
 'contributor_note': '',
 'book_image': 'https://s1.nyt.com/du/books/images/9780735219090.jpg',
 'book_image_width': 328,
 'book_image_height': 495,
 'amazon_product_url': 'https://www.amazon.com/Where-Crawdads-Sing-Delia-Owens/dp/0735219095?tag=NYTBS-20',
 'age_group': '',
 'book_review_link': '',
 'first_chapter_link': '',
 'sunday_review_link': '',
 'article_chapter_link': '',
 'isbns': [{'isbn10': '0735219095', 'isbn13': '9780735219090'},
  {'isbn10': '0735219117', 'isbn13': '9780735219113'},
  {'isbn10': '0525640371', 'isbn13': '9780525640370'},
  {'isbn10

In [28]:
books = []
for i in b['results']['books']:
    books.append(i['title'])

In [29]:
books

['WHERE THE CRAWDADS SING',
 'AMERICAN DIRT',
 'GOLDEN IN DEATH',
 'THE SILENT PATIENT',
 'THE DUTCH HOUSE',
 'CROOKED RIVER',
 'THE GUARDIANS',
 'SUCH A FUN AGE',
 'A LONG PETAL OF THE SEA',
 'WEATHER',
 'THE GIVER OF STARS',
 'DEAR EDWARD',
 'LOST',
 'THE INSTITUTE',
 'WHEN YOU SEE ME']

##  Getting all lists from a time period

In [30]:
lists_df.head()

Unnamed: 0,display_name,list_name,list_name_encoded,newest_published_date,oldest_published_date,updated
0,Combined Print & E-Book Fiction,Combined Print and E-Book Fiction,combined-print-and-e-book-fiction,2020-03-01,2011-02-13,WEEKLY
1,Combined Print & E-Book Nonfiction,Combined Print and E-Book Nonfiction,combined-print-and-e-book-nonfiction,2020-03-01,2011-02-13,WEEKLY
2,Hardcover Fiction,Hardcover Fiction,hardcover-fiction,2020-03-01,2008-06-08,WEEKLY
3,Hardcover Nonfiction,Hardcover Nonfiction,hardcover-nonfiction,2020-03-01,2008-06-08,WEEKLY
4,Paperback Trade Fiction,Trade Fiction Paperback,trade-fiction-paperback,2020-03-01,2008-06-08,WEEKLY


In [31]:
lists_df['book_list']=np.nan

In [32]:
lists_df.head()

Unnamed: 0,display_name,list_name,list_name_encoded,newest_published_date,oldest_published_date,updated,book_list
0,Combined Print & E-Book Fiction,Combined Print and E-Book Fiction,combined-print-and-e-book-fiction,2020-03-01,2011-02-13,WEEKLY,
1,Combined Print & E-Book Nonfiction,Combined Print and E-Book Nonfiction,combined-print-and-e-book-nonfiction,2020-03-01,2011-02-13,WEEKLY,
2,Hardcover Fiction,Hardcover Fiction,hardcover-fiction,2020-03-01,2008-06-08,WEEKLY,
3,Hardcover Nonfiction,Hardcover Nonfiction,hardcover-nonfiction,2020-03-01,2008-06-08,WEEKLY,
4,Paperback Trade Fiction,Trade Fiction Paperback,trade-fiction-paperback,2020-03-01,2008-06-08,WEEKLY,


In [33]:
# This doesn't work, I have to figure out why.
lists_df.loc[lists_df.list_name_encoded=='hardcover-fiction','book_list'] = ['test',"this"]

ValueError: Must have equal len keys and value when setting with an iterable

In [34]:
lists_df.head()

Unnamed: 0,display_name,list_name,list_name_encoded,newest_published_date,oldest_published_date,updated,book_list
0,Combined Print & E-Book Fiction,Combined Print and E-Book Fiction,combined-print-and-e-book-fiction,2020-03-01,2011-02-13,WEEKLY,
1,Combined Print & E-Book Nonfiction,Combined Print and E-Book Nonfiction,combined-print-and-e-book-nonfiction,2020-03-01,2011-02-13,WEEKLY,
2,Hardcover Fiction,Hardcover Fiction,hardcover-fiction,2020-03-01,2008-06-08,WEEKLY,
3,Hardcover Nonfiction,Hardcover Nonfiction,hardcover-nonfiction,2020-03-01,2008-06-08,WEEKLY,
4,Paperback Trade Fiction,Trade Fiction Paperback,trade-fiction-paperback,2020-03-01,2008-06-08,WEEKLY,


In [35]:
lists1 = lists[:2]
lists1

['combined-print-and-e-book-fiction', 'combined-print-and-e-book-nonfiction']

In [36]:
# Retrive information 
books_dict2 = {}
for item in lists:
    request_url="https://api.nytimes.com/svc/books/v3/lists/current/"+item+".json?api-key=76quqESu9qYqfKpEfnJou4kpY1uJ9pMV"
    results = requests.get(request_url)
    results = json.loads(results.text)
    print("status:", results['status'])
    try:
        book_list = aggregate(results['results']['books'],'title')
        books_dict2[item]=book_list
        print(item, "added")
    except KeyError:
        print(item, "NOT added")
    time.sleep(5)

status: OK
combined-print-and-e-book-fiction added
status: OK
combined-print-and-e-book-nonfiction added
status: OK
hardcover-fiction added
status: OK
hardcover-nonfiction added
status: OK
trade-fiction-paperback added
status: OK
mass-market-paperback added
status: OK
paperback-nonfiction added
status: OK
e-book-fiction added


KeyError: 'status'

In [None]:
lists

In [None]:
books_dict['hardcover-political-books']

In [None]:

request_url="https://api.nytimes.com/svc/books/v3/lists/current/hardcover-political-books.json?api-key=76quqESu9qYqfKpEfnJou4kpY1uJ9pMV"
results = requests.get(request_url)
results = json.loads(results.text)

In [None]:
aggregate(results['results']['books'],'title')

In [None]:
results['status']

In [None]:
df = pd.DataFrame(columns=['test'])
df['test']=lists

In [None]:
df

In [None]:
Wikipedia API Information: https://www.mediawiki.org/wiki/API:Main_page
Wikipedia page of interest: https://en.wikipedia.org/wiki/List_of_best-selling_books

Basic Features:

- Title
- Author
    - Gender
    - Nationality
    - Languages Spoken By author (at time of writing)
- Genre
- Publisher
- Published year
- Published in 
    - (country, state city?)
- Pages
- Sales Data
- Original Language

Possible Engineered Features:

- popularity
- Combined Rating Weighed from different sources
- NLP
    - publisher site information
    - book author information
    - Reviews (from one or several sources)
    - Book wiki page
    - Author wiki page
    - Reddit information
    - books reddit search results + sentiment analysis on results
- does reddit exist
- does wiki exist
- does fandom exist
- twitter mentions
- google mentions 
- NN on book cover
