In [4]:
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

# Base URLs
base_url = "https://books.toscrape.com/catalogue/page-{}.html"
first_page_url = "https://books.toscrape.com/catalogue/page-1.html"

# Send a GET request to fetch the webpage
response = requests.get(first_page_url)

# Display the HTML source code
print("=== HTML Source Code of the Page ===\n")
print(response.text[:1000])
print("\n=== End of HTML Source Code ===\n")

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")

# Detect total pages automatically
page_text = soup.find("li", class_="current").text.strip()  # "Page 1 of 50"
total_pages = int(page_text.split("of")[-1].strip())
print(f"📄 Total pages found: {total_pages}")

# Create CSV file
with open("books.csv", "w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Title", "Price", "Availability", "Product_URL"])

    # Loop through all detected pages
    for page in range(1, total_pages + 1):
        print(f"Scraping page {page}/{total_pages}...")
        url = base_url.format(page)
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")

        # Find all book containers
        books = soup.find_all("article", class_="product_pod")

        for book in books:
            title = book.h3.a["title"]
            price = book.find("p", class_="price_color").text.strip()
            availability = book.find("p", class_="instock availability").text.strip()
            product_link = "https://books.toscrape.com/catalogue/" + book.h3.a["href"]

            writer.writerow([title, price, availability, product_link])

print("Done! Data saved to 'books.csv'")
df = pd.read_csv("books.csv")
print(f"Total books stored in CSV file: {len(df)} books\n")
print("\nShow first 10 rows from books.csv:\n")


print(df.head(10).to_string(index=False))


=== HTML Source Code of the Page ===



<!DOCTYPE html>
<!--[if lt IE 7]>      <html lang="en-us" class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]>         <html lang="en-us" class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]>         <html lang="en-us" class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!--> <html lang="en-us" class="no-js"> <!--<![endif]-->
    <head>
        <title>
    All products | Books to Scrape - Sandbox
</title>

        <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
        <meta name="created" content="24th Jun 2016 09:30" />
        <meta name="description" content="" />
        <meta name="viewport" content="width=device-width" />
        <meta name="robots" content="NOARCHIVE,NOCACHE" />

        <!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
        <!--[if lt IE 9]>
        <script src="//html5shim.googlecode.com/svn/trunk/html5.js"></script>
        <![endif]-->

        
            <link rel="sh