In [1]:
# pip install selenium beautifulsoup4

import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

def scrape_ccc(asin):
    url = f"https://camelcamelcamel.com/product/{asin}"

    # 1) Set up Chrome WITHOUT headless
    opts = Options()
    # DO NOT use opts.add_argument("--headless")
    opts.add_argument("--disable-gpu")
    opts.add_argument("--no-sandbox")
    # Optional: disable extensions to avoid interference
    opts.add_argument("--disable-extensions")

    driver = webdriver.Chrome(options=opts)
    driver.get(url)

    # 2) Wait until the key element is in the DOM
    # Wait for the title and also the chart container
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "#product-header h2 a"))
    )
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.ID, "chartplaceholder"))
    )

    # 3) Give a couple of extra seconds for Cloudflare and JS scripts
    time.sleep(5)

    # 4) Get the final HTML
    html = driver.page_source
    driver.quit()

    # 5) Parse with BeautifulSoup
    soup = BeautifulSoup(html, "html.parser")

    # --- Extract title
    titulo_el = soup.select_one("#product-header h2 a")
    titulo = titulo_el.get_text(strip=True) if titulo_el else "N/D"

    # --- Extract ASIN
    asin_scraped = None
    for span in soup.select("span.product-title-field"):
        strong = span.find("strong")
        if strong and strong.get_text(strip=True) == "ASIN":
            asin_scraped = span.get_text(strip=True).split(":", 1)[1].strip()
            break

    # --- Price history
    historial = {}
    for row in soup.select("table.camelegend tbody tr"):
        cols = row.find_all("td")
        if len(cols) >= 5:
            tipo    = cols[0].get_text(strip=True)
            lowest  = cols[1].get_text(strip=True)
            highest = cols[2].get_text(strip=True)
            current = cols[3].get_text(strip=True)
            avg     = cols[4].get_text(strip=True)
            historial[tipo] = {
                "lowest":  lowest,
                "highest": highest,
                "current": current,
                "average": avg
            }

    # --- Current buy-box prices
    buybox = {}
    for cell in soup.select("#buy-box .top .grid-x .cell"):
        price_el = cell.select_one(".bgp")
        label_el = cell.select_one(".price-type-label")
        date_el  = cell.select_one(".pricing-date")
        if price_el and label_el:
            key = label_el.get_text(strip=True)
            buybox[key] = {
                "price": price_el.get_text(strip=True),
                "as_of": date_el.get_text(strip=True) if date_el else None
            }

    return {
        "titulo": titulo,
        "asin": asin_scraped,
        "historial": historial,
        "buybox": buybox
    }

if __name__ == "__main__":
    datos = scrape_ccc("B09V3K2RS4")
    print("Title:   ", datos["titulo"])
    print("ASIN:     ", datos["asin"])
    print("\nPrice history:")
    for k, v in datos["historial"].items():
        print(f"  {k}: {v}")
    print("\nCurrent Buy-Box:")
    for k, v in datos["buybox"].items():
        print(f"  {k}: {v}")


Title:    Apple iPad Air (5th Generation): with M1 chip,  10.9-inch Liquid Retina Display,  256GB,  Wi-Fi 6,  12MP front/ 12MP Back Camera,  Touch ID,  All-Day Battery Life – Blue
ASIN:      B09V3K2RS4

Price history:

Current Buy-Box:
  Amazon Price: {'price': 'Out of Stock', 'as_of': None}
  3rd Party New Price: {'price': 'Out of Stock', 'as_of': None}
  3rd Party Used Price: {'price': '$557.33', 'as_of': 'as of May 21, 2025 10:13 PM'}


In [2]:
import pandas as pd

# Convert buybox to DataFrame
buybox_df = pd.DataFrame.from_dict(datos["buybox"], orient="index")
buybox_df.index.name = "Type"
buybox_df.reset_index(inplace=True)

# Add title and ASIN columns
buybox_df["Title"] = datos["titulo"]
buybox_df["ASIN"] = datos["asin"]

# Reorder columns
cols = ["Title", "ASIN", "Type", "price", "as_of"]
buybox_df = buybox_df[cols]

buybox_df

Unnamed: 0,Title,ASIN,Type,price,as_of
0,"Apple iPad Air (5th Generation): with M1 chip,...",B09V3K2RS4,Amazon Price,Out of Stock,
1,"Apple iPad Air (5th Generation): with M1 chip,...",B09V3K2RS4,3rd Party New Price,Out of Stock,
2,"Apple iPad Air (5th Generation): with M1 chip,...",B09V3K2RS4,3rd Party Used Price,$557.33,"as of May 21, 2025 10:13 PM"
