# Web scraping of NYT's 100 best books of 2022

In [114]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [36]:
url = 'https://www.nytimes.com/interactive/2022/11/22/books/notable-books.html'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')

In [130]:
# Getting book titles
book_titles_html = soup.find_all('h4',{'class':'book-title'})
# for loop to transform book titles embedded in html into text
book_titles = []
for book in book_titles_html:
    book_titles.append(book.text)

In [129]:
# Getting author names
author_list_html = soup.find_all('div',{'class':'book-author hover'})
# for loop to transform author names embedded in html into text
author_list = []
for author in author_list_html:
    author_list.append(author.text)

In [115]:
# Getting links to book reviews
book_list = soup.find_all('div',{'class':'text-elements'})
link_list = []
# for loop to transform links embedded in html into text
for book in book_list:
    if book.find('a') is None:
        continue
    else:
        link_list.append(book.find('a').get('href'))

In [154]:
# Getting review blurb for each book
book_reviews = soup.find_all('div',{'class':'text-elements'})
reviews_list = []
# for loop to transform reviews embedded in html into text
for review in book_reviews:
    if review.find('p') is None:
        continue
    else:
        reviews_list.append(review.find('p').text)

In [164]:
# Putting everything into a df
book_dict = {
    'Title':book_titles,
    'Author':author_list,
    'Blurb':reviews_list,
    'Review link':link_list}
df_NYT_books = pd.DataFrame(data=book_dict)
df_NYT_books.to_excel('bookscraping_NYT2022.xlsx')
df_NYT_books

Unnamed: 0,Title,Author,Blurb,Review link
0,Best Barbarian: Poems,Roger Reeves,Reeves’s terrific second poetry collection eru...,https://www.nytimes.com/2022/04/15/books/revie...
1,The Hurting Kind: Poems,Ada Limón,"Again and again in this poetry collection, her...",https://www.nytimes.com/2022/05/10/books/revie...
2,Now Do You Know Where You Are: Poems,Dana Levin,Levin’s poetry collection is about many things...,https://www.nytimes.com/2022/04/15/books/revie...
3,Afterlives,Abdulrazak Gurnah,The Nobel laureate’s latest novel to be publis...,https://www.nytimes.com/2022/08/18/books/revie...
4,Avalon,Nell Zink,"In Zink’s sixth novel, a girl named Bran harbo...",https://www.nytimes.com/2022/05/17/books/revie...
...,...,...,...,...
95,The Trayvon Generation,Elizabeth Alexander,The poet and scholar traces the effects of sys...,https://www.nytimes.com/2022/03/31/books/eliza...
96,Under the Skin: The Hidden Toll of Racism on A...,Linda Villarosa,Through case histories and independent reporti...,https://www.nytimes.com/2022/06/08/books/revie...
97,Walking the Bowl: A True Story of Murder and S...,Chris Lockhart and Daniel Mulilo Chama,This work of narrative ethnography by an anthr...,https://www.nytimes.com/2022/02/13/books/revie...
98,We Don’t Know Ourselves: A Personal History of...,Fintan O’Toole,"O’Toole, a prolific journalist and critic, did...",https://www.nytimes.com/2022/03/15/books/revie...
