In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
url = "https://books.toscrape.com/catalogue/page-1.html"

response = requests.get(url)
response

<Response [200]>

In [3]:
soup = BeautifulSoup(response.content)

In [4]:
books = soup.find_all('article', class_='product_pod')
len(books)

20

In [5]:
books[0].find_all("p", attrs = {"class":"star-rating"})[0]["class"][-1]

'Three'

In [None]:
#Functions to get the product info from the home page

def get_book_rating(book):
    rating_str = book.find_all("p", attrs = {"class":"star-rating"})[0]["class"][-1]
    if rating_str == "One":
        return 1
    elif rating_str == "Two":
        return 2
    elif rating_str == "Three":
        return 3
    elif rating_str == "Four":
        return 4
    else:
        return 5
    

def get_book_price(book):
    price = book.find_all("p", attrs = {"class" :"price_color"})[0].get_text().replace("£","")
    return float(price)


def get_book_title(book):
    book_title = book.find_all("a")[-1]["title"]
    return book_title

def get_book_href(book):
    base_url = "https://books.toscrape.com/catalogue/"
    book_url = book.find_all("a")[-1]["href"]
    return base_url + book_url

In [None]:
#Functions to get further info from book page

def get_genre(soup_book):
    header = soup_book.find_all("ul", attrs = {"class":"breadcrumb"})
    return header[0].find_all("li")[-2].get_text().strip()

def get_upc(soup_book):
    return soup_book.find_all("td")[0].get_text()

def get_availability(soup_book):
    return soup_book.find_all("p", attrs = {"class":"instock availability"})[0].get_text().strip()


In [None]:
def get_page_books(page_number = 1, min_rating = 4, max_price = 20):
    url = f"https://books.toscrape.com/catalogue/page-{page_number}.html"
    response = requests.get(url)
    soup = BeautifulSoup(response.content)

    books_dict = {}
    index = 0

    books = soup.find_all("li", attrs = {"class":"col-xs-6 col-sm-4 col-md-3 col-lg-3"})

    for book in books:
        price = get_book_price(book)
        title = get_book_title(book)
        link = get_book_href(book)
        rating = get_book_rating(book)

        if price <= max_price and rating >= min_rating:
            response_book = requests.get(link)
            soup_book = BeautifulSoup(response_book.content)

            upc = get_upc(soup_book)
            availability = get_availability(soup_book)
            genre = get_genre(soup_book)

            books_dict[index] = {"price":price,
                                "title":title,
                                "link":link,
                                "rating":rating,
                                "upc":upc,
                                "availability":availability,
                                "genre":genre}
            index += 1

    return pd.DataFrame.from_dict(books_dict, orient="index")

get_page_books(page_number=1, min_rating = 1, max_price = 70)

Unnamed: 0,price,title,link,rating,upc,availability,genre
0,51.77,A Light in the Attic,https://books.toscrape.com/catalogue/a-light-i...,3,a897fe39b1053632,In stock (22 available),Poetry
1,53.74,Tipping the Velvet,https://books.toscrape.com/catalogue/tipping-t...,1,90fa61229261140a,In stock (20 available),Historical Fiction
2,50.1,Soumission,https://books.toscrape.com/catalogue/soumissio...,1,6957f44c3847a760,In stock (20 available),Fiction
3,47.82,Sharp Objects,https://books.toscrape.com/catalogue/sharp-obj...,4,e00eb4fd7b871a48,In stock (20 available),Mystery
4,54.23,Sapiens: A Brief History of Humankind,https://books.toscrape.com/catalogue/sapiens-a...,5,4165285e1663650f,In stock (20 available),History
5,22.65,The Requiem Red,https://books.toscrape.com/catalogue/the-requi...,1,f77dbf2323deb740,In stock (19 available),Young Adult
6,33.34,The Dirty Little Secrets of Getting Your Dream...,https://books.toscrape.com/catalogue/the-dirty...,4,2597b5a345f45e1b,In stock (19 available),Business
7,17.93,The Coming Woman: A Novel Based on the Life of...,https://books.toscrape.com/catalogue/the-comin...,3,e72a5dfc7e9267b2,In stock (19 available),Default
8,22.6,The Boys in the Boat: Nine Americans and Their...,https://books.toscrape.com/catalogue/the-boys-...,4,e10e1e165dc8be4a,In stock (19 available),Default
9,52.15,The Black Maria,https://books.toscrape.com/catalogue/the-black...,1,1dfe412b8ac00530,In stock (19 available),Poetry


In [None]:
list_books = []
for i in range(1,51):
    df_page = get_page_books(page_number=i, max_price=20, min_rating=4)
    list_books.append(df_page)

In [None]:
final_df = pd.concat(list_books)
final_df.reset_index(drop=True, inplace=True)

In [11]:
final_df

Unnamed: 0,price,title,link,rating,upc,availability,genre
0,17.46,Set Me Free,https://books.toscrape.com/catalogue/set-me-fr...,5,ce6396b0f23f6ecc,In stock (19 available),Young Adult
1,17.66,The Four Agreements: A Practical Guide to Pers...,https://books.toscrape.com/catalogue/the-four-...,5,6258a1f6a6dcfe50,In stock (18 available),Spirituality
2,15.94,Sophie's World,https://books.toscrape.com/catalogue/sophies-w...,5,6be3beb0793a53e7,In stock (18 available),Philosophy
3,14.27,Untitled Collection: Sabbath Poems 2014,https://books.toscrape.com/catalogue/untitled-...,4,657fe5ead67a7767,In stock (16 available),Poetry
4,19.49,This One Summer,https://books.toscrape.com/catalogue/this-one-...,4,51653ef291ab7ddc,In stock (16 available),Sequential Art
...,...,...,...,...,...,...,...
70,19.69,The Zombie Room,https://books.toscrape.com/catalogue/the-zombi...,5,9c96cd1329fbd82d,In stock (1 available),Default
71,12.34,The Silent Wife,https://books.toscrape.com/catalogue/the-silen...,5,b78deb463531d078,In stock (1 available),Fiction
72,12.29,The Girl You Lost,https://books.toscrape.com/catalogue/the-girl-...,5,4280ac3eab57aa5d,In stock (1 available),Mystery
73,19.18,The Edge of Reason (Bridget Jones #2),https://books.toscrape.com/catalogue/the-edge-...,4,29fc016c459aeb14,In stock (1 available),Womens Fiction
