In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin

# Target website (safe for scraping)
BASE_URL = "https://books.toscrape.com/"
URL = urljoin(BASE_URL, "catalogue/page-1.html")

products = []

while URL:
    response = requests.get(URL)
    response.encoding = "utf-8"  # ensure proper encoding
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract product information
    for product in soup.select("article.product_pod"):
        title = product.h3.a["title"]
        price = product.select_one(".price_color").text.replace("Â", "").strip()
        availability = product.select_one(".instock.availability").text.strip()
        rating = product.p["class"][1]  # Star rating (e.g., "Three", "Four", etc.)
        relative_link = product.h3.a["href"]

        # Ensure all product URLs contain 'catalogue/'
        if "catalogue/" not in relative_link:
            relative_link = "catalogue/" + relative_link.lstrip("./")

        # Construct full absolute URL safely
        full_link = urljoin(BASE_URL, relative_link)

        products.append({
            "Title": title,
            "Price": price,
            "Availability": availability,
            "Rating": rating,
            "Full_Link": full_link
        })

    # Find next page
    next_page = soup.select_one("li.next > a")
    if next_page:
        URL = urljoin(URL, next_page["href"])
    else:
        URL = None

# Save to CSV (ensure UTF-8 encoding)
df = pd.DataFrame(products)
# Make Excel-clickable hyperlinks using the HYPERLINK formula
df["Full_Link"] = '=HYPERLINK("' + df["Full_Link"] + '", "Click")'
df.to_csv("products.csv", index=False, encoding="utf-8-sig")

print("✅ Scraping complete! Data saved to 'products.csv'")


✅ Scraping complete! Data saved to 'products.csv'
