In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import re

base_url = "https://books.toscrape.com/"

def get_soup(url):
    response = requests.get(url)
    response.encoding = "utf-8"           # Fixing encoding issue
    return BeautifulSoup(response.text, "html.parser")

def clean_price(raw_price):
    # Removing weird characters and currency symbol, keeping only digits and decimal
    return re.sub(r"[^\d\.]", "", raw_price)

def scrape_page(url):
    soup = get_soup(url)
    books = soup.find_all("article", class_="product_pod")

    page_data = []

    for book in books:
        title = book.h3.a["title"]

        raw_price = book.find("p", class_="price_color").text.strip()
        price = clean_price(raw_price)  # Cleaning numeric price

        rating = book.p["class"][1]  # star rating text
        availability = book.find("p", class_="instock availability").text.strip()
        link = base_url + book.h3.a["href"]

        page_data.append([title, price, rating, availability, link])

    return page_data

def scrape_all_books():
    all_books = []
    page = 1

    while True:
        page_url = f"{base_url}catalogue/page-{page}.html"
        response = requests.get(page_url)

        if response.status_code != 200:
            break

        print(f"Scraping page {page}...")

        all_books.extend(scrape_page(page_url))
        page += 1

    return all_books

def save_to_csv(data):
    with open("books_data.csv", "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Title", "Price", "Rating", "Availability", "Link"])
        writer.writerows(data)

    print("Saved clean data to books.csv")

if __name__ == "__main__":
    data = scrape_all_books()
    save_to_csv(data)
    print(f"Scraped {len(data)} books successfully!")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
