In [1]:
import selenium
print(selenium.__version__)

4.34.2


In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

options = Options()
options.add_argument("--headless")
service = Service("chromedriver.exe")

driver = webdriver.Chrome(service=service, options=options)

driver.get("https://books.toscrape.com")
print("✅ Site loaded.")

✅ Site loaded.


In [5]:
from selenium.webdriver.common.by import By
import pandas as pd

# Find all book containers
books = driver.find_elements(By.CLASS_NAME, "product_pod")

data = []

for book in books:
    title = book.find_element(By.TAG_NAME, "h3").find_element(By.TAG_NAME, "a").get_attribute("title")
    price = book.find_element(By.CLASS_NAME, "price_color").text
    availability = book.find_element(By.CLASS_NAME, "availability").text.strip()

    data.append({
        "title": title,
        "price": price,
        "availability": availability
    })

# Create DataFrame
df = pd.DataFrame(data)
df.head()

Unnamed: 0,title,price,availability
0,A Light in the Attic,£51.77,In stock
1,Tipping the Velvet,£53.74,In stock
2,Soumission,£50.10,In stock
3,Sharp Objects,£47.82,In stock
4,Sapiens: A Brief History of Humankind,£54.23,In stock


In [6]:
# Save the data to CSV
df.to_csv("books_data.csv", index=False)

print("✅ Data saved to books_data.csv")

✅ Data saved to books_data.csv


In [7]:
# Clean up price column (remove £ and convert to float)
df['price'] = df['price'].str.replace('£', '').astype(float)

# Basic insights
print("Average book price:", round(df['price'].mean(), 2))
print("Max price:", df['price'].max())
print("Min price:", df['price'].min())

Average book price: 38.05
Max price: 57.25
Min price: 13.99


In [8]:
from selenium.common.exceptions import NoSuchElementException

all_data = []

while True:
    books = driver.find_elements(By.CLASS_NAME, "product_pod")
    
    for book in books:
        title = book.find_element(By.TAG_NAME, "h3").text
        price = book.find_element(By.CLASS_NAME, "price_color").text
        availability = book.find_element(By.CLASS_NAME, "availability").text.strip()

        all_data.append({
            "title": title,
            "price": price,
            "availability": availability
        })
    
    # Try to go to next page
    try:
        next_button = driver.find_element(By.CLASS_NAME, "next")
        next_page_url = next_button.find_element(By.TAG_NAME, "a").get_attribute("href")
        driver.get(next_page_url)
    except NoSuchElementException:
        break  # No next page, we're done

# Convert to DataFrame
df_all = pd.DataFrame(all_data)
df_all['price'] = df_all['price'].str.replace('£', '').astype(float)
df_all.to_csv("books_all_pages.csv", index=False)
print("✅ All pages scraped and saved!")
df_all.head()

✅ All pages scraped and saved!


Unnamed: 0,title,price,availability
0,A Light in the ...,51.77,In stock
1,Tipping the Velvet,53.74,In stock
2,Soumission,50.1,In stock
3,Sharp Objects,47.82,In stock
4,Sapiens: A Brief History ...,54.23,In stock
