In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import random

In [None]:
base_url = "http://books.toscrape.com/catalogue/page-{}.html"

all_books = []

# Loop semua halaman (1–50)
for page in range(1, 51):
    url = base_url.format(page)
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Gagal akses halaman {page}")
        continue

    soup = BeautifulSoup(response.text, "html.parser")
    books = soup.find_all("article", class_="product_pod")

    for book in books:
        title = book.h3.a["title"]
        price = book.find("p", class_="price_color").text.strip()
        availability = book.find("p", class_="instock availability").text.strip()
        rating = book.p["class"][1]  # Rating ada di class
        link = book.h3.a["href"].replace("../../../", "")

        all_books.append({
            "Title": title,
            "Price": price,
            "Availability": availability,
            "Rating": rating,
            "Link": "http://books.toscrape.com/catalogue/" + link
        })
# Buat DataFrame
df = pd.DataFrame(all_books)

In [None]:
print("Total buku:", len(df))
df.head()

Total buku: 1000


Unnamed: 0,Title,Price,Availability,Rating,Link
0,A Light in the Attic,Â£51.77,In stock,Three,http://books.toscrape.com/catalogue/a-light-in...
1,Tipping the Velvet,Â£53.74,In stock,One,http://books.toscrape.com/catalogue/tipping-th...
2,Soumission,Â£50.10,In stock,One,http://books.toscrape.com/catalogue/soumission...
3,Sharp Objects,Â£47.82,In stock,Four,http://books.toscrape.com/catalogue/sharp-obje...
4,Sapiens: A Brief History of Humankind,Â£54.23,In stock,Five,http://books.toscrape.com/catalogue/sapiens-a-...


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Title         1000 non-null   object
 1   Price         1000 non-null   object
 2   Availability  1000 non-null   object
 3   Rating        1000 non-null   object
 4   Link          1000 non-null   object
dtypes: object(5)
memory usage: 39.2+ KB


In [None]:
# =========================
# STEP 2: Pilih 1 buku secara random
# =========================
valid_books = []

for idx, row in df.iterrows():
    response_book = requests.get(row["Link"])
    if response_book.status_code != 200:
        continue

    soup_book = BeautifulSoup(response_book.text, "html.parser")

    # Deskripsi
    product_description = soup_book.find("div", id="product_description")
    if product_description:
        description = product_description.find_next("p").text.strip()
    else:
        continue  # skip jika tidak ada deskripsi

    # Kategori
    breadcrumb = soup_book.find("ul", class_="breadcrumb")
    if breadcrumb and len(breadcrumb.find_all("a")) >= 3:
        category = breadcrumb.find_all("a")[2].text.strip()
    else:
        continue  # skip jika tidak ada kategori

    valid_books.append({
        "Title": row["Title"],
        "Price": row["Price"],
        "Availability": row["Availability"],
        "Rating": row["Rating"],
        "Category": category,
        "Description": description
    })

In [None]:
# =========================
# STEP 3: Ambil satu buku random dari hasil valid
# =========================
if valid_books:
    random_book = random.choice(valid_books)
    print("Judul Buku:", random_book["Title"])
    print("Harga:", random_book["Price"])
    print("Ketersediaan:", random_book["Availability"])
    print("Rating:", random_book["Rating"])
    print("Kategori:", random_book["Category"])
    print("Deskripsi:", random_book["Description"])
else:
    print("Tidak ada buku yang memiliki kategori & deskripsi.")

Judul Buku: Three Wishes (River of Time: California #1)
Harga: 44.18
Ketersediaan: In stock
Rating: 2
Kategori: Science Fiction
Deskripsi: BE CAREFUL WHAT YOU WISH FOR. BECAUSE YOU JUST MIGHT GET IT. Zara Ruiz is a normal California girl on the verge of graduating high school and thinking about her dawning future. But when dusk brings a shattering lossâand she discovers a priceless object in the shallows of a tide poolâshe is thrust a hundred-and-eighty years into the past. There she meets handsome ranchero J BE CAREFUL WHAT YOU WISH FOR. BECAUSE YOU JUST MIGHT GET IT. Zara Ruiz is a normal California girl on the verge of graduating high school and thinking about her dawning future. But when dusk brings a shattering lossâand she discovers a priceless object in the shallows of a tide poolâshe is thrust a hundred-and-eighty years into the past. There she meets handsome ranchero Javier de la Venturaâwho has big dreams and bigger secretsâalong with his enchanting family, in a l

**Opsional**
---
Membersihkan simbol dan mengubah huruf menjadi angka pada dataframe tersebut.

In [None]:
# ========================
# 1. Bersihkan simbol pada Price
# ========================
df["Price"] = df["Price"].str.replace("Â£", "").astype(float)

# ========================
# 2. Ubah Rating menjadi angka
# ========================
rating_map = {
    "One": 1,
    "Two": 2,
    "Three": 3,
    "Four": 4,
    "Five": 5
}
df["Rating"] = df["Rating"].map(rating_map)

In [None]:

# ========================
# Hasil bersih
# ========================
df.head()

Unnamed: 0,Title,Price,Availability,Rating,Link
0,A Light in the Attic,51.77,In stock,3,http://books.toscrape.com/catalogue/a-light-in...
1,Tipping the Velvet,53.74,In stock,1,http://books.toscrape.com/catalogue/tipping-th...
2,Soumission,50.1,In stock,1,http://books.toscrape.com/catalogue/soumission...
3,Sharp Objects,47.82,In stock,4,http://books.toscrape.com/catalogue/sharp-obje...
4,Sapiens: A Brief History of Humankind,54.23,In stock,5,http://books.toscrape.com/catalogue/sapiens-a-...


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Title         1000 non-null   object 
 1   Price         1000 non-null   float64
 2   Availability  1000 non-null   object 
 3   Rating        1000 non-null   int64  
 4   Link          1000 non-null   object 
dtypes: float64(1), int64(1), object(3)
memory usage: 39.2+ KB
