In [None]:


import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_books(min_rating, max_price):
    # Base URL for scraping
    base_url = "http://books.toscrape.com/catalogue/page-{}.html"

    # Prepare a list to hold book data
    books_data = []

    # Loop through pages
    for page in range(1, 6):  # Assuming there are at least 5 pages to scrape
        url = base_url.format(page)
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find book sections
        books = soup.find_all('article', class_='product_pod')

        for book in books:
            # Get book title
            title = book.h3.a['title']
            # Get book price
            price = float(book.find('p', class_='price_color').text[2:])
            # Get availability
            availability = book.find('p', class_='instock availability').text.strip()
            # Get the star rating - note that the rating class reflects the number of stars
            rating = len(book.find('p')['class']) - 1  # subtract 1 because 'star-rating' is also a class

            # Only proceed if the book meets the rating and price criteria
            if rating >= min_rating and price <= max_price:
                # Get UPC (to get this, you would need to visit the product detail page)
                detail_link = book.h3.a['href']
                detail_url = f"http://books.toscrape.com/catalogue/{detail_link.split('/')[-2]}/index.html"
                detail_response = requests.get(detail_url)
                detail_soup = BeautifulSoup(detail_response.content, 'html.parser')
                upc = detail_soup.find('th', text='UPC').find_next_sibling('td').text

                # Get Product Description (if available)
                description = detail_soup.find('meta', attrs={'name': 'description'})['content'].strip()

                # Append the data to the list
                books_data.append({
                    'UPC': upc,
                    'Title': title,
                    'Price (£)': price,
                    'Rating': rating,
                    'Genre': 'Fiction',  # Placeholder for genre; would need to scrape it from detail page similarly
                    'Availability': availability,
                    'Description': description
                })

    # Create DataFrame from scraped data
    books_df = pd.DataFrame(books_data)
    return books_df

# Example usage
df_books = scrape_books(min_rating=4, max_price=20)
print(df_books)