In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [17]:
BASE_URL = "http://books.toscrape.com/catalogue/"

# Función para obtener los enlaces de los libros de una página
def get_books_urls(page_url):
    response = requests.get(page_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    books_urls = []
    for book in soup.find('ol').find_all('li'):
        book_url = book.find('article', class_='product_pod').find('div').find('a')['href']
        books_urls.append(BASE_URL + book_url)
    return books_urls

# Función para extraer nombre, precio y rating de un libro
def get_book_details(book_url):
    response = requests.get(book_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    product_main = soup.find('div', class_='col-sm-6 product_main')
    name = product_main.find('h1').text
    price = product_main.find('p', class_='price_color').text
    rating_element = product_main.find('p', class_='star-rating')
    if rating_element:
        rating_class = rating_element.get('class')  # Esto devolverá una lista de clases
        rating_conversion = {'One': 1, 'Two': 2, 'Three': 3, 'Four': 4, 'Five': 5}
        # Asumiendo que siempre hay una clase que indica el rating y es única
        rating = rating_conversion.get(rating_class[1], None) if rating_class else None
    else:
        rating = None
    return {'name': name, 'price': price, 'rating': rating}

# Función para iterar sobre las páginas y extraer los datos
def scrape_books(num_pages):
    books_details = []
    for i in range(1, num_pages + 1):
        page_url = f"http://books.toscrape.com/catalogue/page-{i}.html"
        books_urls = get_books_urls(page_url)
        for book_url in books_urls:
            book_details = get_book_details(book_url)
            books_details.append(book_details)
    return books_details

In [18]:
books_data = scrape_books(2)
books_df = pd.DataFrame(books_data)
print(books_df)

['star-rating', 'Three']
['star-rating', 'One']
['star-rating', 'One']
['star-rating', 'Four']
['star-rating', 'Five']
['star-rating', 'One']
['star-rating', 'Four']
['star-rating', 'Three']
['star-rating', 'Four']
['star-rating', 'One']
['star-rating', 'Two']
['star-rating', 'Four']
['star-rating', 'Five']
['star-rating', 'Five']
['star-rating', 'Five']
['star-rating', 'Three']
['star-rating', 'One']
['star-rating', 'One']
['star-rating', 'Two']
['star-rating', 'Two']
['star-rating', 'One']
['star-rating', 'Two']
['star-rating', 'Three']
['star-rating', 'Five']
['star-rating', 'Five']
['star-rating', 'Three']
['star-rating', 'Three']
['star-rating', 'Three']
['star-rating', 'Five']
['star-rating', 'Four']
['star-rating', 'Five']
['star-rating', 'Three']
['star-rating', 'Five']
['star-rating', 'One']
['star-rating', 'Five']
['star-rating', 'Three']
['star-rating', 'Two']
['star-rating', 'One']
['star-rating', 'Four']
['star-rating', 'Two']
                                              

In [None]:
print