# Tokopedia Showcase Verification

In [None]:
from pathlib import Path
from typing import Optional

from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    NoSuchElementException,
    TimeoutException,
    ElementClickInterceptedException,
)
import time
import csv
from datetime import datetime

# Roastery Tokopedia URL
STORE_URLS = [
    "https://www.tokopedia.com/instinctroastery/product",
    "https://www.tokopedia.com/fugolcoffee/product",
    "https://www.tokopedia.com/jack-runners-roastery/product",
    "https://www.tokopedia.com/maleena-coffee-roasters/product",
    "https://www.tokopedia.com/hayaticoffee/product",
    "https://www.tokopedia.com/tatidocoffeeroasters/product",
    "https://www.tokopedia.com/herd-coffee-roaster/product",
    "https://www.tokopedia.com/hanaroastery/product",
    "https://www.tokopedia.com/arws/product",
    "https://www.tokopedia.com/thfcoffee/product",
    "https://www.tokopedia.com/nyawang-langit-roastery/product",
    "https://www.tokopedia.com/goodthingscoffee/product",
    "https://www.tokopedia.com/thehiddenswargiroastery/product",
    "https://www.tokopedia.com/portacaba/product",
    "https://www.tokopedia.com/collinsroasters/product",
    "https://www.tokopedia.com/sakha-coffee-bali/product",
    "https://www.tokopedia.com/saysomecoffee/product",
    "https://www.tokopedia.com/skyninecoffee/product",
    "https://www.tokopedia.com/kurokoffee/product",
    "https://www.tokopedia.com/arutalacoffee/product"
]

OUTPUT_PATH = Path("tokopedia_products.csv")

def build_driver() -> webdriver.Chrome:
    """
    Membuat instance Chrome dalam normal mode

    """
    options = ChromeOptions()
    options.add_argument("--start-maximized")
    options.add_argument("--window-size=1920,1080")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-plugins")
    options.add_argument("--disable-images")
    options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    driver = webdriver.Chrome(options=options)
    return driver


def save_rows(rows: list[dict], append: bool) -> None:
    """
    Simpan baris hasil scrape ke CSV, append bila file sudah ada
    
    """
    file_exists = OUTPUT_PATH.exists()
    mode = "a" if append and file_exists else "w"
    with open(OUTPUT_PATH, mode, newline="", encoding="utf-8") as csvfile:
        fieldnames = ["source", "name", "price", "description"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if mode == "w" or not file_exists:
            writer.writeheader()
        writer.writerows(rows)


def scrape_store(store_url: str, limit: Optional[int] = None) -> list[dict]:
    """
    Scrape satu toko dan mengembalikan daftar produk

    """
    driver = build_driver()
    wait = WebDriverWait(driver, 30)
    products_data: list[dict] = []

    try:
        driver.get(store_url)

        title_element = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "[data-testid='showCaseTitle']"))
        )
        if title_element.text != "Semua Produk":
            return []

        try:
            shop_name_element = wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "[data-testid='shopNameHeader']"))
            )
            shop_name = shop_name_element.text
        except Exception:
            shop_name = "Unknown"

        page = 1
        total_scraped_store = 0

        while True:
            card_idx = 1
            cards_in_page = 0

            while True:
                try:
                    if limit and total_scraped_store >= limit:
                        break

                    card_xpath = (
                        "//h4[@data-testid='showCaseTitle']/following-sibling::div[1]/div/div["
                        f"{card_idx}]"
                    )

                    try:
                        card_element = WebDriverWait(driver, 3).until(
                            EC.presence_of_element_located((By.XPATH, card_xpath))
                        )
                    except TimeoutException:
                        break

                    cards_in_page += 1
                    total_scraped_store += 1

                    main_window = driver.current_window_handle
                    driver.execute_script("arguments[0].scrollIntoView(true);", card_element)
                    time.sleep(0.5)
                    driver.execute_script(
                        "window.open(arguments[0].querySelector('a').href, '_blank');",
                        card_element,
                    )
                    time.sleep(1)

                    driver.switch_to.window(driver.window_handles[-1])

                    detail_wait = WebDriverWait(driver, 30)
                    see_more_wait = WebDriverWait(driver, 2)
                    detail_wait.until(
                        EC.presence_of_element_located((By.CSS_SELECTOR, "[data-testid='lblPDPDetailProductName']"))
                    )

                    try:
                        see_more_btn = see_more_wait.until(
                            EC.element_to_be_clickable((By.CSS_SELECTOR, "[data-testid='btnPDPSeeMore']"))
                        )
                        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", see_more_btn)
                        driver.execute_script("window.scrollBy(0, -150);")
                        try:
                            see_more_btn.click()
                        except ElementClickInterceptedException:
                            driver.execute_script("arguments[0].click();", see_more_btn)
                    except TimeoutException:
                        pass

                    product_name = driver.find_element(By.CSS_SELECTOR, "[data-testid='lblPDPDetailProductName']").text
                    product_price = driver.find_element(By.CSS_SELECTOR, "[data-testid='lblPDPDetailProductPrice']").text
                    product_desc = driver.find_element(By.CSS_SELECTOR, "[data-testid='lblPDPDescriptionProduk']").text
                    product_desc_oneline = " ".join(product_desc.split())

                    products_data.append(
                        {
                            "source": shop_name,
                            "name": product_name,
                            "price": product_price,
                            "description": product_desc_oneline,
                        }
                    )

                    driver.close()
                    driver.switch_to.window(main_window)

                    card_idx += 1
                    time.sleep(1)

                except NoSuchElementException:
                    break
                except Exception:
                    if len(driver.window_handles) > 1:
                        driver.close()
                        driver.switch_to.window(driver.window_handles[0])
                    break

            if limit and total_scraped_store >= limit:
                break

            try:
                next_button = driver.find_element(By.CSS_SELECTOR, "[data-testid='btnShopProductPageNext']")
                time.sleep(1)
                next_button.click()
                time.sleep(2)
                page += 1
            except NoSuchElementException:
                break
            except Exception:
                break

        return products_data

    except Exception:
        return []
    finally:
        time.sleep(2)
        driver.quit()


def main(limit: Optional[int] = None, store_urls: Optional[list[str]] = None) -> None:
    """
    Scrape daftar toko secara berurutan, menambahkan hasil ke satu CSV
    
    """
    targets = store_urls or STORE_URLS
    if not targets:
        raise ValueError("STORE_URLS kosong. Tambahkan URL toko terlebih dahulu.")

    for store_url in targets:
        rows = scrape_store(store_url, limit=limit)
        if not rows:
            continue
        append = OUTPUT_PATH.exists()
        save_rows(rows, append=append)

if __name__ == "__main__":
    main()
