# books-to-scrape-scraping

## Import Library

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import time
import pandas as pd

## Function for Scrapping

### Scrapping Function

In [None]:
def scrape_books_from_page(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Memeriksa apakah permintaan berhasil
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')

    # Mengambil semua buku
    books = soup.find_all('article', class_='product_pod')
    book_data = []

    for book in books:
        # Mengambil judul buku
        title = book.h3.a['title']

        # Mengambil harga buku
        price = book.find('p', class_='price_color').text

        # Mengambil ketersediaan buku
        availability = book.find('p', class_='instock availability').text.strip()

        # Mengambil rating buku
        rating_class = book.p['class']
        rating = rating_class[1] if len(rating_class) > 1 else "No rating"

        # Mengambil URL gambar sampul
        image_url = book.find('img')['src']
        image_url = 'https://books.toscrape.com/' + image_url.replace('../', '')

        # Menyimpan data buku dalam bentuk dictionary
        book_data.append({
            'Title': title,
            'Price': price,
            'Availability': availability,
            'Rating': rating,
            'Image URL': image_url
        })

    return book_data


### Function to Scrape Multiple Pages

In [None]:
def scrape_multiple_pages(base_url, total_pages):
    all_books = []

    for page in range(1, total_pages + 1):
        if page == 1:
            url = base_url  # Halaman pertama
        else:
            url = f"{base_url}catalogue/page-{page}.html"  # Halaman berikutnya
        print(f"Scraping page {page}: {url}")
        books = scrape_books_from_page(url)
        if books:
            all_books.extend(books)
        time.sleep(1)  # Memberikan jeda untuk menghindari terlalu banyak request

    return all_books


### Function to Save Data to CSV

In [None]:
def save_to_csv(data, filename):
    if not data:
        print("No data to save.")
        return

    keys = data[0].keys()
    with open(filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)
    print(f"Data saved to {filename}")


## Using Function

### Using Scrapping Function

In [None]:
# Tentukan URL dan jumlah halaman yang ingin di-scrape
base_url = 'https://books.toscrape.com/'
total_pages = 5  # Ubah sesuai jumlah halaman yang diinginkan

# Lakukan scraping dan simpan data
books_data = scrape_multiple_pages(base_url, total_pages)
save_to_csv(books_data, 'books_data.csv')


### Show data on Notebook

In [None]:
# Membaca file CSV dan menampilkan beberapa baris pertama
df = pd.read_csv('books_data.csv')
df.head()