In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from pymongo import MongoClient
import time

client = MongoClient("mongodb://localhost:27017")
db = client["test0912"]
collection = db["books_crawl"]

service = Service(ChromeDriverManager().install())

options = Options()
options.add_argument("--window-size=1920x1080")
options.add_argument("--start-maximized")
options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
options.add_argument("--lang=ko_KR")

driver = webdriver.Chrome(service=service, options=options)

base_url = "https://books.toscrape.com/"
driver.get(base_url)

wait = WebDriverWait(driver, 10)

rating_map = {
    "One": 1,
    "Two": 2,
    "Three": 3,
    "Four": 4,
    "Five": 5
}

categories = driver.find_elements(By.CSS_SELECTOR, "div.side_categories ul li ul li a")
print(f"총 {len(categories)}개 카테고리")

for cat_idx in range(len(categories)):
    categories = driver.find_elements(By.CSS_SELECTOR, "div.side_categories ul li ul li a")
    cat = categories[cat_idx]
    cat_name = cat.text.strip()
    print(f"\n\n===== {cat_idx+1}. 카테고리 클릭: {cat_name} =====")

    cat.click()
    time.sleep(2)

    page_num = 1
    while True:
        print(f"\n--- {cat_name} {page_num} 페이지 ---")

        titles = driver.find_elements(By.CSS_SELECTOR, "article.product_pod > h3")
        price_gbps = driver.find_elements(By.CSS_SELECTOR, "p.price_color")
        stocks = driver.find_elements(By.CSS_SELECTOR, "p.instock.availability")
        ratings = driver.find_elements(By.CSS_SELECTOR, "p.star-rating")
        product_links = driver.find_elements(By.CSS_SELECTOR, "article.product_pod h3 a")

        page_data = []

        for i in range(len(titles)):
            rating_text = [c for c in ratings[i].get_attribute("class").split() if c != "star-rating"][0] if i < len(ratings) else None
            rating_number = rating_map.get(rating_text, 0) if rating_text else None

            record = {
                "category": cat_name,
                "title": titles[i].text if i < len(titles) else None,
                "price": price_gbps[i].text.replace("£", "") if i < len(price_gbps) else None,
                "stock": stocks[i].text.strip() if i < len(stocks) else None,
                "rating": rating_number,
                "url": product_links[i].get_attribute("href") if i < len(product_links) else None,
                "page": page_num
            }
            print(record)
            page_data.append(record)

        # MongoDB 저장
        if page_data:
            collection.insert_many(page_data)
            print(f"{len(page_data)}개 데이터 MongoDB 저장 완료!")

        try:
            next_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "li.next > a")))
            next_btn.click()
            time.sleep(2)
            page_num += 1
        except:
            print(f"{cat_name} 카테고리 마지막 페이지 도달")
            break

    driver.get(base_url)
    time.sleep(1)

driver.quit()
client.close()

총 50개 카테고리 발견 ✅


===== 1. 카테고리 클릭: Travel =====

--- Travel 1 페이지 ---
{'category': 'Travel', 'title': "It's Only the Himalayas", 'price': '45.17', 'stock': 'In stock', 'rating': 2, 'url': 'https://books.toscrape.com/catalogue/its-only-the-himalayas_981/index.html', 'page': 1}
{'category': 'Travel', 'title': 'Full Moon over Noah’s ...', 'price': '49.43', 'stock': 'In stock', 'rating': 4, 'url': 'https://books.toscrape.com/catalogue/full-moon-over-noahs-ark-an-odyssey-to-mount-ararat-and-beyond_811/index.html', 'page': 1}
{'category': 'Travel', 'title': 'See America: A Celebration ...', 'price': '48.87', 'stock': 'In stock', 'rating': 3, 'url': 'https://books.toscrape.com/catalogue/see-america-a-celebration-of-our-national-parks-treasured-sites_732/index.html', 'page': 1}
{'category': 'Travel', 'title': 'Vagabonding: An Uncommon Guide ...', 'price': '36.94', 'stock': 'In stock', 'rating': 2, 'url': 'https://books.toscrape.com/catalogue/vagabonding-an-uncommon-guide-to-the-art-of-long-te