# Web Scrapping car ads from OLX
**by Muhammad Erico Ricardo**

In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Inisialisasi driver (pakai Chrome)
driver = webdriver.Chrome()

# List data
title, Fuel, Price, Engine, Location, Transmission, Range, Description = [], [], [], [], [], [], [], []

# Keyword pencarian
# keyword = input("Masukkan keyword yang ingin dicari: ")
# url = f"https://www.olx.co.id/jakarta-dki_g2000007/mobil-bekas_c198/q-{keyword}"
url = "https://www.olx.co.id/mobil-bekas_c198"
driver.get(url)

# Tunggu halaman awal load
time.sleep(3)

# Target maksimal iklan
max_ads = 345

# Scroll + klik tombol "muat lainnya" sampai cukup iklan
prev_count = 0
while True:
    # Scroll sampai bawah
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)

    # Ambil jumlah iklan sementara
    ads = driver.find_elements(By.CSS_SELECTOR, "li[data-aut-id='itemBox']")
    print(f"📊 Jumlah iklan sementara: {len(ads)}")

    if len(ads) >= max_ads:
        print(f"✅ Sudah cukup {max_ads} iklan, berhenti load lebih banyak")
        break

    # Kalau jumlah iklan tidak bertambah, coba klik tombol
    if len(ads) == prev_count:
        try:
            load_more_btn = WebDriverWait(driver, 5).until(
                EC.visibility_of_element_located((By.CSS_SELECTOR, "button[data-aut-id='btnLoadMore']"))
            )
            # Scroll ke tombol
            driver.execute_script("arguments[0].scrollIntoView(true);", load_more_btn)
            time.sleep(1)
            # Klik pakai JS biar lebih stabil
            driver.execute_script("arguments[0].click();", load_more_btn)
            print("👉 Klik tombol 'Muat lebih banyak'")
            time.sleep(3)
        except:
            print("⚠️ Tombol 'Muat lebih banyak' tidak ditemukan (mungkin iklan sudah habis).")
            break

    prev_count = len(ads)

# Ambil semua link iklan
ads = driver.find_elements(By.CSS_SELECTOR, "li[data-aut-id='itemBox'] a")
links = [ad.get_attribute("href") for ad in ads if ad.get_attribute("href")]

# Batasi hanya 100 link pertama
links = links[:max_ads]
print(f"Ditemukan {len(links)} iklan...")

# Loop iklan berdasarkan link
for idx, link in enumerate(links):
    try:
        driver.get(link)

        # Scroll sedikit biar konten detail muncul
        driver.execute_script("window.scrollTo(0, 500);")
        time.sleep(1)

        # Tunggu sampai judul iklan muncul
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.TAG_NAME, "h1"))
        )

        # Parse halaman detail
        soup = BeautifulSoup(driver.page_source, "html.parser")

        # Judul
        ad_title = soup.find("h1").get_text() if soup.find("h1") else None

        # Fuel
        fuel_element = soup.find("h2", {"data-aut-id": "itemAttribute_fuel"})

        # Transmission
        transmission_element = soup.find("h2", {"data-aut-id": "itemAttribute_transmission"})

        # Range (Mileage)
        range_element = soup.find("div", {"data-aut-id": "itemAttribute_mileage"})

        # Harga
        price_element = soup.find("div", {"data-aut-id": "itemPrice"})

        # Lokasi & Kapasitas mesin
        location_val, engine_val = None, None
        detail_boxes = soup.find_all("div", class_="_3dS7E")
        for box in detail_boxes:
            label = box.find("div", class_="_CCSn")
            value = box.find("div", class_="_3VRXh")
            if label and value:
                if "Lokasi" in label.get_text():
                    location_val = value.get_text()
                elif "Kapasitas mesin" in label.get_text():
                    engine_val = value.get_text()

        # Deskripsi
        desc_block = soup.find("div", {"data-aut-id": "descriptionDetails"})
        desc_text = " ".join(desc_block.stripped_strings) if desc_block else None

        # Masukkan ke list
        title.append(ad_title)
        Fuel.append(fuel_element.get_text() if fuel_element else None)
        Price.append(price_element.get_text() if price_element else None)
        Engine.append(engine_val)
        Location.append(location_val)
        Transmission.append(transmission_element.get_text() if transmission_element else None)
        Range.append(range_element.get_text() if range_element else None)
        Description.append(desc_text)

        print(f"✅ Berhasil scrape iklan {idx+1}/{len(links)}")

    except Exception as e:
        print(f"❌ Error di iklan {idx+1}: {e}")
        title.append(None)
        Fuel.append(None)
        Price.append(None)
        Engine.append(None)
        Location.append(None)
        Transmission.append(None)
        Range.append(None)
        Description.append(None)

# Tutup browser
driver.quit()

# Simpan ke CSV
data = pd.DataFrame({
    "Title": title,
    "Fuel": Fuel,
    "Price": Price, 
    "Engine": Engine,
    "Location": Location,
    "Transmission": Transmission,
    "Range": Range,
    "Description": Description
})
data.to_csv("olx_scrape.csv", index=False, encoding="utf-8-sig")
print("📂 Data berhasil disimpan ke olx_scrape.csv")


📊 Jumlah iklan sementara: 20
📊 Jumlah iklan sementara: 20
⚠️ Tombol 'Muat lebih banyak' tidak ditemukan (mungkin iklan sudah habis).
Ditemukan 20 iklan...
✅ Berhasil scrape iklan 1/20
✅ Berhasil scrape iklan 2/20
✅ Berhasil scrape iklan 3/20
✅ Berhasil scrape iklan 4/20
✅ Berhasil scrape iklan 5/20
✅ Berhasil scrape iklan 6/20
✅ Berhasil scrape iklan 7/20
✅ Berhasil scrape iklan 8/20
✅ Berhasil scrape iklan 9/20
✅ Berhasil scrape iklan 10/20
✅ Berhasil scrape iklan 11/20
✅ Berhasil scrape iklan 12/20
✅ Berhasil scrape iklan 13/20
✅ Berhasil scrape iklan 14/20
✅ Berhasil scrape iklan 15/20
✅ Berhasil scrape iklan 16/20
✅ Berhasil scrape iklan 17/20
✅ Berhasil scrape iklan 18/20
✅ Berhasil scrape iklan 19/20
✅ Berhasil scrape iklan 20/20
📂 Data berhasil disimpan ke olx_scrape.csv
