In [41]:
from bs4 import BeautifulSoup
import requests

In [42]:
URL = "https://books.toscrape.com/catalogue/page-1.html"
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')

In [43]:
# Globals
listCategories = []
listBooks = []

In [44]:
# Locators
BOOKLOCATOR = "li article.product_pod h3 a"
RATINGLOCATOR = "li article.product_pod p.star-rating"
PRICELOCATOR = "li article.product_pod div.product_price p.price_color"
NUMBERPAGESLOCATOR = "div.container-fluid div.page_inner div.row div.col-sm-8 section div div ul.pager li.current"
CATEGORIESLOCATOR = "aside.sidebar div.side_categories ul.nav-list ul li a"
BOOKSLOCATOR = "div.page_inner div.row div section ol.row li"
PICLOCATOR = "li article.product_pod div.image_container a img"

In [59]:
# functions
def getProductName(book):
    return book.select_one(BOOKLOCATOR).attrs['title']

def getURLProduct(book):
    return book.select_one(BOOKLOCATOR).attrs['href']

def ratingNumber(rating):
    match rating:
        case 'One':
            return 1
        case 'Two':
            return 2
        case 'Three':
            return 3
        case 'Four':
            return 4
        case 'Five':
            return 5
    return None

def getRatingProduct(book):
    rating = book.select_one(RATINGLOCATOR).attrs['class'][1]
    return ratingNumber(rating)

def getProductPrice(book):
    price = book.select_one(PRICELOCATOR).text
    return float(price[1:])

def getTotalPages():
    temp = soup.select_one(NUMBERPAGESLOCATOR).text
    return int(temp.split()[-1])

def getAllCategories():
    return soup.select(CATEGORIESLOCATOR)

def fillCategoryList():
    allCategory = getAllCategories()
    for category in allCategory:
        listCategories.append(category.text.strip())


def getAllBooks(soup):
    return soup.select(BOOKSLOCATOR)

def getProductPicURL(book):
    domain = "https://books.toscrape.com/"
    relative = book.select_one(PICLOCATOR).attrs['src']
    return domain + relative[3:]

def extractAllBooks(allBooks):
    for book in allBooks:
        bookName = getProductName(book)
        bookURL = getURLProduct(book)
        bookRating = getRatingProduct(book)
        bookPrice = getProductPrice(book)
        bookPic = getProductPicURL(book)
        listBooks.append({
            'bookName' : bookName,
            'bookURL' : bookURL,
            'bookRating' : bookRating,
            'bookPrice' : bookPrice,
            'bookPic' : bookPic
        })
        

def fillListBooks(totalPages):
    # inicialmente vamos buscar apenas os dados de uma página
    #for i in range(1, 3):
    for i in range(1, totalPages+1):
        print(f"Extracting from Page {i}\n")
        URL = f"https://books.toscrape.com/catalogue/page-{i}.html"
        page = requests.get(URL)
        soup = BeautifulSoup(page.content, 'html.parser')
        allBooks = getAllBooks(soup)
        extractAllBooks(allBooks)
        
def getPricesRangeBooks(x , y):
    print(f'- Books with price bettween {x} and {y}')
    for book in listBooks:
        if book['bookPrice'] >= x and book['bookPrice'] <= y:
            print(book)

def getBookLetter(A):
    print(f'- Books starting with letter {A}')
    for book in listBooks:
        if book['bookName'][0] == A:
            print(book)

def getBookRating(rating):
    print(f'- Books with rating {rating}')
    for book in listBooks:
        if book['bookRating'] == rating:
            print(book)


In [46]:
# call functions
totalPages = getTotalPages()
fillListBooks(totalPages)
# print(len(listBooks))

Extracting from Page 1

Extracting from Page 2

Extracting from Page 3

Extracting from Page 4

Extracting from Page 5

Extracting from Page 6

Extracting from Page 7

Extracting from Page 8

Extracting from Page 9

Extracting from Page 10

Extracting from Page 11

Extracting from Page 12

Extracting from Page 13

Extracting from Page 14

Extracting from Page 15

Extracting from Page 16

Extracting from Page 17

Extracting from Page 18

Extracting from Page 19

Extracting from Page 20

Extracting from Page 21

Extracting from Page 22

Extracting from Page 23

Extracting from Page 24

Extracting from Page 25

Extracting from Page 26

Extracting from Page 27

Extracting from Page 28

Extracting from Page 29

Extracting from Page 30

Extracting from Page 31

Extracting from Page 32

Extracting from Page 33

Extracting from Page 34

Extracting from Page 35

Extracting from Page 36

Extracting from Page 37

Extracting from Page 38

Extracting from Page 39

Extracting from Page 40

Extractin

In [60]:
print(100*'*')
getPricesRangeBooks(5.5,10.5)
print(100*'*')
getBookLetter('A')
print(100*'*')
getBookRating(4)

****************************************************************************************************
- Books with price bettween 5.5 and 10.5
{'bookName': 'Patience', 'bookURL': 'patience_916/index.html', 'bookRating': 3, 'bookPrice': 10.16, 'bookPic': 'https://books.toscrape.com/media/cache/01/72/01726c619a05114dca75bd840095016d.jpg'}
{'bookName': 'The Lucifer Effect: Understanding How Good People Turn Evil', 'bookURL': 'the-lucifer-effect-understanding-how-good-people-turn-evil_758/index.html', 'bookRating': 1, 'bookPrice': 10.4, 'bookPic': 'https://books.toscrape.com/media/cache/a6/c8/a6c8256b123493472591c5855c7de704.jpg'}
{'bookName': 'Greek Mythic History', 'bookURL': 'greek-mythic-history_698/index.html', 'bookRating': 5, 'bookPrice': 10.23, 'bookPic': 'https://books.toscrape.com/media/cache/36/cf/36cf56c7bdf35aadbcc6f05a8e8d8fcb.jpg'}
{'bookName': 'NaNo What Now? Finding your editing process, revising your NaNoWriMo book and building a writing career through publishing and beyon