In [None]:
#!pip install requests BeautifulSoup
#!pip install lxml

In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import re
import time
import random


In [2]:
# ---- FUNCTION TO SCRAPE ONE PAGE ----
def scrape_page(page_num):
    url = f"https://books.toscrape.com/catalogue/page-{page_num}.html"
    r = requests.get(url, timeout=10)
    r.encoding = "utf-8"  # fix encoding issues
    soup = BeautifulSoup(r.text, "lxml")
    books = []

    for book in soup.select("ol.row li"):
        title = book.h3.a["title"]
        price_text = book.select_one(".price_color").text
        price = float(re.search(r"[\d\.]+", price_text).group())
        availability = book.select_one(".availability").text.strip()
        rating_class = book.select_one(".star-rating").get("class", [])
        rating = rating_class[1] if len(rating_class) > 1 else "Not Rated"

        books.append({
            "title": title,
            "price": price,
            "availability": availability,
            "rating": rating
        })

    return books



In [3]:
# ---- FUNCTION TO SCRAPE MULTIPLE PAGES ----
def scrape_books(start_page=1, end_page=3, delay_range=(1,2)):
    all_books = []
    for page in range(start_page, end_page + 1):
        page_books = scrape_page(page)
        all_books.extend(page_books)
        time.sleep(random.uniform(*delay_range))  # polite delay
    return all_books

In [4]:
# ---- FUNCTION TO SAVE CSV ----
def save_csv(path, data):
    if not data:
        return
    with open(path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=list(data[0].keys()))
        writer.writeheader()
        writer.writerows(data)

In [7]:
# ---- MAIN FUNCTION ----
def main():
    CSV_PATH = r"C:\Users\Menna Mohamed\Downloads\books\new_scraped.csv"  # <-- set your path here
    START_PAGE = 1
    END_PAGE = 3

    print("Starting scraping...")
    all_books = scrape_books(START_PAGE, END_PAGE)
    save_csv(CSV_PATH, all_books)
    print(f"Scraping complete! {len(all_books)} books saved to {CSV_PATH}")

In [8]:
# ---- RUN MAIN ----
if __name__ == "__main__":
    main()

Starting scraping...
Scraping complete! 60 books saved to 