## NPR Bookscraping: Method 1
### Using pd.read_json @ https://apps.npr.org/best-books/2022.json 

In [1]:
%%time
import pandas as pd

df = pd.DataFrame()

for year in range(2013,2023):
    url = f'https://apps.npr.org/best-books/{year}.json'
    df = pd.concat([df, pd.read_json(url)])
    
df = df.drop(columns=['dimensions','cover','id'])

# Saving to excel
df.to_excel('Bookscraping_NPR_2013-2022.xlsx')

df

CPU times: user 802 ms, sys: 299 ms, total: 1.1 s
Wall time: 1.95 s


Unnamed: 0,title,author,tags
0,A Constellation Of Vital Phenomena: A Novel,Anthony Marra,"[staff picks, realistic fiction, for history l..."
1,A Corner Of White,Jaclyn Moriarty,"[young adult, sci fi, fantasy & speculative fi..."
2,A Duke Never Yields,Juliana Gray,"[love stories, for history lovers, let’s talk ..."
3,A Natural History Of Dragons: A Memoir By Lady...,Marie Brennan,"[sci fi, fantasy & speculative fiction, for hi..."
4,A Reader's Book Of Days: True Tales From The L...,Tom Nissley,"[staff picks, for history lovers, nonfiction, ..."
...,...,...,...
397,Shine Bright: A Very Personal History of Black...,Danyel Smith,"[nonfiction, staff picks, biography & memoir, ..."
398,"Dilla Time: The Life and Afterlife of J Dilla,...",Dan Charnas,"[nonfiction, staff picks, biography & memoir, ..."
399,Punks: New & Selected Poems,John Keene,"[short stories, essays & poetry, identity & cu..."
400,Demon Copperhead: A Novel,Barbara Kingsolver,"[realistic fiction, the states we’re in, famil..."


## NPR Bookscraping: Method 2
## Using Selenium

In [2]:
%%time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By

book_list = []

#current range of books run from 2013 to 2022
for year in range(2013,2023):
    
    # setting chrome browser as webdriver
    driver = webdriver.Chrome()
    url = f'https://apps.npr.org/best-books/#view=list&year={year}'
    driver.get(url)

    # metadata class contains book information
    books = driver.find_elements(By.CLASS_NAME, "metadata")

    for book in books:
        # book titles are in the "title" class, authors are in "author" class
        title = book.find_element(By.CLASS_NAME, 'title').text
        author = book.find_element(By.CLASS_NAME, 'author').text.replace("by ","")
        book_dict = {
            'Title':title,
            'Author':author,
            'Year':year
        }
        book_list.append(book_dict)
    
df = pd.DataFrame(book_list)

df

CPU times: user 7.5 s, sys: 746 ms, total: 8.25 s
Wall time: 4min 32s


Unnamed: 0,Title,Author,Year
0,A Constellation Of Vital Phenomena: A Novel,Anthony Marra,2013
1,A Corner Of White,Jaclyn Moriarty,2013
2,A Duke Never Yields,Juliana Gray,2013
3,A Natural History Of Dragons: A Memoir By Lady...,Marie Brennan,2013
4,A Reader's Book Of Days: True Tales From The L...,Tom Nissley,2013
...,...,...,...
3250,You Gotta Be You: How to Embrace This Messy Li...,Brandon Kyle Goodman,2022
3251,You Made a Fool of Death with Your Beauty: A N...,Akwaeke Emezi,2022
3252,You're Invited,Amanda Jayatissa,2022
3253,Young Mungo,Douglas Stuart,2022
