In [35]:
import pandas as pd 
import requests 
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [36]:
# ============================================
# Step 1) Get page range from user
# ============================================
start_page = int(input("Start page: "))
end_page = int(input("End page: "))

if start_page > end_page:
    raise ValueError("Start page must be smaller than end page")

print("\nScraping pages...\n")

Start page: 1
End page: 3

Scraping pages...



In [37]:
# ============================================
# Step 2) Lists to store scraped data
# ============================================
images = []
titles = []
links = []
stars = []
prices = []
availability = []


# ============================================
# Step 3) Scrape pages
# ============================================
for p in range(start_page, end_page + 1):

    # NOTE: BooksToScrape uses catalogue pages, not real pagination
    url = f"https://books.toscrape.com/?p={p}"
    print("Page", p, "-->", url)

    response = requests.get(url)
    response.encoding = "utf-8"   # Fix encoding issues like Â£

    if response.status_code != 200:
        print("request failed:", response.status_code)
        

    soup = BeautifulSoup(response.text, "html.parser")

    books = soup.find_all("article", class_="product_pod")
    print("Number of books found:", len(books), "\n")

    
    # ========================================
    # Step 4) Extract data from each book
    # ========================================
    for book in books:

        # ----- Image -----
        image_tag = book.find("img", class_="thumbnail")
        image = image_tag.get("src") if image_tag else None
        image_normal = urljoin(url, image) if image else None

        # ----- Title & Link -----
        h3_tag = book.find("h3")
        a_tag = h3_tag.find("a") if h3_tag else None

        title = a_tag.get("title") if a_tag else None
        link = a_tag.get("href") if a_tag else None
        link_normal = urljoin(url, link) if link else None

        # ----- Star Rating -----
        # Example: <p class="star-rating Three"></p>
        star_tag = book.find("p", class_="star-rating")
        star = star_tag.get("class")[1] if star_tag and star_tag.get("class") else None

        # ----- Price -----
        price_tag = book.find("p", class_="price_color")
        price = price_tag.text.strip() if price_tag else None
        price_num = price.replace("£", "").replace("Â", "").strip() if price else None

        # ----- Availability -----
        available_tag = book.find("p", class_="instock availability")
        available = available_tag.text.strip() if available_tag else None

        # ----- Store -----
        images.append(image_normal)
        titles.append(title)
        links.append(link_normal)
        stars.append(star)
        prices.append(price_num)
        availability.append(available)


Page 1 --> https://books.toscrape.com/?p=1
Number of books found: 20 

Page 2 --> https://books.toscrape.com/?p=2
Number of books found: 20 

Page 3 --> https://books.toscrape.com/?p=3
Number of books found: 20 



In [39]:
# ============================================
# Step 5) Create DataFrame
# ============================================
df = pd.DataFrame({
    "Image": images,
    "Title": titles,
    "Link": links,
    "Star": stars,
    "Price": prices,
    "Availability": availability
})

print("\nDataFrame shape:", df.shape)
df.head(5)



DataFrame shape: (60, 6)


Unnamed: 0,Image,Title,Link,Star,Price,Availability
0,https://books.toscrape.com/media/cache/2c/da/2...,A Light in the Attic,https://books.toscrape.com/catalogue/a-light-i...,Three,51.77,In stock
1,https://books.toscrape.com/media/cache/26/0c/2...,Tipping the Velvet,https://books.toscrape.com/catalogue/tipping-t...,One,53.74,In stock
2,https://books.toscrape.com/media/cache/3e/ef/3...,Soumission,https://books.toscrape.com/catalogue/soumissio...,One,50.1,In stock
3,https://books.toscrape.com/media/cache/32/51/3...,Sharp Objects,https://books.toscrape.com/catalogue/sharp-obj...,Four,47.82,In stock
4,https://books.toscrape.com/media/cache/be/a5/b...,Sapiens: A Brief History of Humankind,https://books.toscrape.com/catalogue/sapiens-a...,Five,54.23,In stock


In [40]:
# ============================================
# Step 6) Save to CSV
# ============================================
df.to_csv("books_multi_page.csv", index=False)
print("\nSaved: books_multi_page.csv")



Saved: books_multi_page.csv
