In [2]:
pip install requests beautifulsoup4 

Note: you may need to restart the kernel to use updated packages.


In [7]:
# Books to Scrape (https://books.toscrape.com) 크롤링

import requests
from bs4 import BeautifulSoup
import re

BASE_URL = "https://books.toscrape.com/"
CATEGORY_URL = BASE_URL + "catalogue/category/books/travel_2/index.html" 


res = requests.get(CATEGORY_URL)
soup = BeautifulSoup(res.text, "html.parser")


rating_map = {"One":1, "Two":2, "Three":3, "Four":4, "Five":5}

books = []
items = soup.select("article.product_pod")

for item in items:
    title = item.h3.a["title"]
    

    product_url = BASE_URL + "catalogue/" + item.h3.a["href"]
    
    price_text = item.select_one("p.price_color").get_text(strip=True)
    price = float(re.sub(r"[^\d.]", "", price_text))
    
    stock_text = item.select_one("p.instock.availability").get_text(" ", strip=True)
    stock = int(re.search(r"(\d+)", stock_text).group(1)) if re.search(r"(\d+)", stock_text) else 0
    
    rating_class = [c for c in item.p.get("class", []) if c in rating_map]
    rating = rating_map[rating_class[0]] if rating_class else None

    books.append({
        "title": title,
        "price": price,
        "stock": stock,
        "rating": rating,
        "category": "Travel",
        "product_url": product_url
    })

print(f"✅총 {len(books)}권 수집 완료\n")
for b in books:
    print(f"제목: {b['title']}")
    print(f"가격: £{b['price']}")
    print(f"재고: {b['stock']}")
    print(f"평점: {b['rating']}")
    print(f"카테고리: {b['category']}")
    print(f"URL: {b['product_url']}")
    print("-" * 50)


✅총 11권 수집 완료

제목: It's Only the Himalayas
가격: £45.17
재고: 0
평점: 2
카테고리: Travel
URL: https://books.toscrape.com/catalogue/../../../its-only-the-himalayas_981/index.html
--------------------------------------------------
제목: Full Moon over Noahâs Ark: An Odyssey to Mount Ararat and Beyond
가격: £49.43
재고: 0
평점: 4
카테고리: Travel
URL: https://books.toscrape.com/catalogue/../../../full-moon-over-noahs-ark-an-odyssey-to-mount-ararat-and-beyond_811/index.html
--------------------------------------------------
제목: See America: A Celebration of Our National Parks & Treasured Sites
가격: £48.87
재고: 0
평점: 3
카테고리: Travel
URL: https://books.toscrape.com/catalogue/../../../see-america-a-celebration-of-our-national-parks-treasured-sites_732/index.html
--------------------------------------------------
제목: Vagabonding: An Uncommon Guide to the Art of Long-Term World Travel
가격: £36.94
재고: 0
평점: 2
카테고리: Travel
URL: https://books.toscrape.com/catalogue/../../../vagabonding-an-uncommon-guide-to-the-art-of-long

In [20]:
# mongodb에 데이터 보내기

from pymongo import MongoClient


client = MongoClient("mongodb://localhost:27017") 
db = client["books_db"] 
collection = db["books"]


collection.delete_many({})


collection.insert_many(books)

print("✅ MongoDB에 데이터 저장 완료")


✅ MongoDB에 데이터 저장 완료


In [10]:
# mysql에 데이터 보내기

import pymysql

db = pymysql.connect(
    host="localhost",
    user="root",
    password="forena1303!!",
    db = "books_db",
    charset="utf8"
)


cursor = db.cursor()

cursor.execute("CREATE DATABASE IF NOT EXISTS books_db;")
cursor.execute("USE books_db;")

cursor.execute("""
CREATE TABLE IF NOT EXISTS books (
    id INT AUTO_INCREMENT PRIMARY KEY,
    title VARCHAR(300),
    price VARCHAR(50),
    stock INT,
    rating INT,
    category VARCHAR(100),
    product_url TEXT
)
""")


sql = "INSERT INTO books (title, price, stock, rating, category, product_url) VALUES (%s, %s, %s, %s, %s, %s)"
for b in books:
    cursor.execute(sql, (b["title"], b["price"], b["stock"], b["rating"], b["category"], b["product_url"]))

db.commit()
print("✅ MySQL에 데이터 저장 완료")

cursor.close()
db.close()

✅ MySQL에 데이터 저장 완료
