In [None]:
import requests
from bs4 import BeautifulSoup

# Función genérica para obtener el texto de un elemento dado su selector CSS
def extraer_texto_soup(soup, selector):
    elemento = soup.select_one(selector)
    return elemento.get_text(strip=True) if elemento else None

# 1) RIPLEY: precio base = "Precio normal" -> class="product-price-container product-normal-price"
def obtener_precio_ripley(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # El selector CSS para la clase product-normal-price
    selector_precio = ".product-price-container.product-normal-price"
    return extraer_texto_soup(soup, selector_precio)

# 2) PROMART: precio base = "Precio lista" -> class="listprice js-price-listprice"
def obtener_precio_promart(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # El selector CSS para la clase listprice js-price-listprice
    selector_precio = "div.bestPrice span.js-pp"
    # En la página de listado puede haber varios; tomamos el primero
    return extraer_texto_soup(soup, selector_precio)

# 3) SODIMAC: precio base = "Precio lista" (no tiene clase definida explícita en tu tabla),
#    pero en tu tabla dices "Precio lista" → simplemente buscamos "Precio lista" como texto
def obtener_precio_sodimac(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # En muchos casos Sodimac muestra el precio con tarjeta y el precio sin tarjeta.
    # Asumiremos que el "precio lista" aparece dentro de un <span> con clase que contiene 'listPrice'.
    selector_precio = "span[class*='listPriceValue']"
    return extraer_texto_soup(soup, selector_precio)

# 4) HIRAOKA: precio base = "Precio regular" -> <span class="old-price"><span class="price">
def obtener_precio_hiraoka(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # Usamos el selector para span.old-price > span.price
    selector_precio = "span.old-price > span.price"
    return extraer_texto_soup(soup, selector_precio)

# 5) METRO: precio base = "Precio regular" → <span class="vtex-product-price-1-x-listPriceValue ... strike">
def obtener_precio_metro(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # Selector para span que contenga 'listPriceValue' y strike
    selector_precio = "span.vtex-product-price-1-x-listPriceValue.strike"
    return extraer_texto_soup(soup, selector_precio)

# 6) FALABELLA/TOTTUS: precio base = "Precio regular" -> <span class="copy17 primary senary ... bold ...">
def obtener_precio_falabella(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # Selector para span.copy17.primary.senary.bold
    selector_precio = "span.copy17.primary.senary.bold"
    return extraer_texto_soup(soup, selector_precio)

if __name__ == "__main__":
    # Diccionario con cada empresa y su función para extraer precio
    urls_y_funciones = {
        "Ripley": {
            "url": "https://simple.ripley.com.pe/tecnologia/computacion/laptops?source=search&term=laptop%20asus&facet=Marca%3AASUS",
            "func": obtener_precio_ripley
        },
        "Promart": {
            "url": "https://www.promart.pe/laptops?ft=laptop",
            "func": obtener_precio_promart
        },
        "Sodimac": {
            "url": "https://www.sodimac.com.pe/sodimac-pe/articulo/143501073/Lenovo-Ideapad-Slim-3-15Amn8-Ryzen-5-7520U-8Gb-512Gb-SSD-W11-(82Xq00Ltlm)/143501074?exp=so_com",
            "func": obtener_precio_sodimac
        },
        "Hiraoka": {
            "url": "https://hiraoka.com.pe/gpsearch/?q=acer&_gsSearchId=682fb74972ca452b56f600c6-1747957793301",
            "func": obtener_precio_hiraoka
        },
        "Metro": {
            "url": "https://www.metro.pe/laptop%20hp?_q=laptop%20hp&map=ft",
            "func": obtener_precio_metro
        },
        "Falabella/Tottus": {
            "url": "https://www.falabella.com.pe/falabella-pe/search?Ntt=laptop+asus",
            "func": obtener_precio_falabella
        }
    }

    for empresa, info in urls_y_funciones.items():
        url = info["url"]
        func = info["func"]
        try:
            precio = func(url)
        except Exception as e:
            precio = f"Error al extraer ({e})"
        print(f"{empresa}: {precio}")


Ripley: Error al extraer (403 Client Error: Forbidden for url: https://simple.ripley.com.pe/tecnologia/computacion/laptops?source=search&term=laptop%20asus&facet=Marca%3AASUS)
Promart: 
Sodimac: None
Hiraoka: None
Metro: None
Falabella/Tottus: Error al extraer (403 Client Error: Forbidden for url: https://www.falabella.com.pe/falabella-pe/search?Ntt=laptop+asus)


In [5]:
import requests
from bs4 import BeautifulSoup
import re

def get_price(url, price_classes):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Buscar el precio usando todas las clases posibles
        for class_name in price_classes:
            elements = soup.find_all(class_=class_name.strip())
            if not elements:
                continue
                
            for element in elements:
                price_text = element.get_text(strip=True)
                match = re.search(r'[\d.,]+', price_text)
                if match:
                    # Limpiar y convertir el precio
                    price_str = match.group().replace('.', '').replace(',', '.')
                    return float(price_str)
        
        return None
    
    except Exception as e:
        print(f"Error al obtener precio: {str(e)}")
        return None

# Diccionario con URLs y sus respectivas clases para precio base
stores = {
    'Ripley': {
        'url': 'https://simple.ripley.com.pe/tecnologia/computacion/laptops?source=search&term=laptop%20asus&facet=Marca%3AASUS',
        'price_classes': [
            'product-price-container product-normal-price',
            'product-price-container product-internet-price'
        ]
    },
    'Promart': {
        'url': 'https://www.promart.pe/laptops?ft=laptop',
        'price_classes': [
            'listprice js-price-listprice',
            'price js-price-price lp tam'
        ]
    },
    'Sodimac': {
        'url': 'https://www.sodimac.com.pe/sodimac-pe/articulo/143501073/Lenovo-Ideapad-Slim-3-15Amn8-Ryzen-5-7520U-8Gb-512Gb-SSD-W11-(82Xq00Ltlm)/143501074?exp=so_com',
        'price_classes': [
            'copy12 primary high jsx-2835692965 normal line-height-29',
            'Precio lista'  # Usar texto si la clase no funciona
        ]
    },
    'Hiraoka': {
        'url': 'https://hiraoka.com.pe/gpsearch/?q=acer&_gsSearchId=682fb74972ca452b56f600c6-1747957793301',
        'price_classes': [
            'old-price',
            'special-price'
        ]
    },
    'Metro': {
        'url': 'https://www.metro.pe/laptop%20hp?_q=laptop%20hp&map=ft',
        'price_classes': [
            'vtex-product-price-1-x-listPriceValue',
            'vtex-product-price-1-x-sellingPriceValue'
        ]
    },
    'Falabella/Tottus': {
        'url': 'https://www.falabella.com.pe/falabella-pe/search?Ntt=laptop+asus',
        'price_classes': [
            'copy17 primary senary jsx-2835692965 bold line-height-29',
            'copy12 primary high jsx-2835692965 normal line-height-29'
        ]
    }
}

# Obtener precios para todas las tiendas
for store, data in stores.items():
    price = get_price(data['url'], data['price_classes'])
    if price:
        print(f"{store}: S/ {price:.2f}")
    else:
        print(f"{store}: No se encontró precio")

Error al obtener precio: 403 Client Error: Forbidden for url: https://simple.ripley.com.pe/tecnologia/computacion/laptops?source=search&term=laptop%20asus&facet=Marca%3AASUS
Ripley: No se encontró precio
Promart: No se encontró precio
Sodimac: No se encontró precio
Hiraoka: S/ 4.00
Metro: No se encontró precio
Error al obtener precio: 403 Client Error: Forbidden for url: https://www.falabella.com.pe/falabella-pe/search?Ntt=laptop+asus
Falabella/Tottus: No se encontró precio


In [52]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import time

def obtener_precios_promart(url):
    """
    Abre la página con Selenium, espera a que se cargue el JavaScript,
    y devuelve una lista de strings con todos los precios 'bestPrice'
    que encuentre en el listado de productos.
    """
    # 1) Configuramos Selenium para usar Chrome (descarga automática del driver)
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    # (Opcional) Ejecutar en modo headless (sin ventana), si no quieres ver el navegador.
    # options.add_argument("--headless")
    driver = webdriver.Chrome(service=service, options=options)

    try:
        # 2) Vamos a la URL de listados
        driver.get(url)

        # 3) Esperamos un par de segundos para que el JS cargue los precios.
        #    Dependiendo de tu conexión y del sitio, quizá necesites aumentar este tiempo
        time.sleep(5)  # espera 5 segundos. Ajusta si ves que tarda más.

        # 4) Buscamos todos los <div class="bestPrice js-bestPrice ...">
        #    y extraemos el texto del <span class="js-pp"> que está adentro.
        precios = []
        # a) localizamos todos los nodos <div> con clase "bestPrice js-bestPrice"
        contenedores_best = driver.find_elements(By.CSS_SELECTOR, "div.bestPrice.js-bestPrice")
        for cont in contenedores_best:
            # b) dentro de cada contenedor, buscamos el <span class="js-pp">
            span_pp = cont.find_element(By.CSS_SELECTOR, "span.js-pp")
            texto = span_pp.text.strip()  # por ejemplo "S/ 1,700"
            if texto:
                precios.append(texto)

        return precios

    finally:
        # 5) Cerramos el navegador (libera memoria)
        driver.quit()


if __name__ == "__main__":
    url_promart = "https://www.promart.pe/laptops?ft=laptop"
    lista_de_precios = obtener_precios_promart(url_promart)

    # Imprimimos todos los precios encontrados
    if lista_de_precios:
        print("Precios encontrados (bestPrice) en Promart:")
        for idx, precio in enumerate(lista_de_precios, start=1):
            print(f"{idx}. {precio}")
    else:
        print("No se encontró ningún precio bestPrice.")


NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=137.0.7151.104)
Stacktrace:
	GetHandleVerifier [0x0xc73783+63299]
	GetHandleVerifier [0x0xc737c4+63364]
	(No symbol) [0x0xaa1113]
	(No symbol) [0x0xa7ff69]
	(No symbol) [0x0xb14fce]
	(No symbol) [0x0xb2f359]
	(No symbol) [0x0xb0e376]
	(No symbol) [0x0xadd6e0]
	(No symbol) [0x0xade544]
	GetHandleVerifier [0x0xece073+2531379]
	GetHandleVerifier [0x0xec9372+2511666]
	GetHandleVerifier [0x0xc99efa+220858]
	GetHandleVerifier [0x0xc8a548+156936]
	GetHandleVerifier [0x0xc90c7d+183357]
	GetHandleVerifier [0x0xc7b6e8+95912]
	GetHandleVerifier [0x0xc7b890+96336]
	GetHandleVerifier [0x0xc6666a+9770]
	BaseThreadInitThunk [0x0x75c85d49+25]
	RtlInitializeExceptionChain [0x0x772bd09b+107]
	RtlGetAppContainerNamedObjectPath [0x0x772bd021+561]


In [55]:
import json
from urllib.parse import urlparse
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Configuración de Chrome en modo headless
def init_driver():
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument(
        '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
        'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
    )
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=options)
def scrape_ripley(url):
    driver = init_driver()
    driver.get(url)
    wait = WebDriverWait(driver, 25)
    
    # Esperar contenedores de productos (mejor selector)
    containers = wait.until(
        EC.presence_of_all_elements_located(
            (By.CSS_SELECTOR, 'div.catalog-product-item__container')
        )
    )

    results = []
    for c in containers:
        try:
            # Título
            title_el = c.find_element(By.CSS_SELECTOR, '.catalog-product-details__name')
            title = title_el.text.strip()
            
            # Precio
            try:
                price_el = c.find_element(By.CSS_SELECTOR, 'li.catalog-prices__offer-price')
                price_text = price_el.text.strip()
            except:
                price_el = c.find_element(By.CSS_SELECTOR, 'li.catalog-prices__list-price')
                price_text = price_el.text.strip()
                
            price = float(
                price_text
                .replace('S/ ', '')
                .replace('$ ', '')
                .replace('.', '')
                .replace(',', '.')
            )
            
            # URL del producto
            link_el = c.find_element(By.CSS_SELECTOR, 'a.catalog-product-item')
            product_url = link_el.get_attribute('href')
            
            # IMAGEN PRINCIPAL (SOLUCIÓN CLAVE)
            # Opción 1: Usar el contenedor específico de imágenes
            img_container = c.find_element(By.CSS_SELECTOR, '.catalog-product__image-preview')
            img_el = img_container.find_element(By.CSS_SELECTOR, '.images-preview-item.is-active img')
            image_url = img_el.get_attribute('src')
            
            # Opción 2: Alternativa más robusta
            # image_url = driver.execute_script('''
            #    return arguments[0].querySelector('.images-preview-item.is-active img').src;
            # ''', c)
            
            # Dominio
            domain = urlparse(product_url).netloc
            
            results.append({
                'title': title,
                'price': price,
                'image_url': image_url,
                'product_url': product_url,
                'domain': domain
            })
            
        except Exception as e:
            print(f"Error procesando producto: {str(e)}")
            continue
            
    driver.quit()
    return results
if __name__ == '__main__':
    url = 'https://simple.ripley.com.pe/tecnologia/computacion/laptops?source=search&term=laptop%20asus'
    items = scrape_ripley(url)
    count = len(items)
    output = {
        'success': count > 0,
        'count': count,
        'results': items
    }
    print(json.dumps(output, ensure_ascii=False, indent=2))
    print(f"Se recuperaron {count} artículos.")


{
  "success": true,
  "count": 46,
  "results": [
    {
      "title": "LAPTOP HP 15-FD0029LA INTEL CORE I5-1334U 12GB RAM 512GBSSD 15.6\"",
      "price": 2.099,
      "image_url": "https://rimage.ripley.com.pe/home.ripley/Attachment/WOP/1/2004351827222/full_image-2004351827222.webp",
      "product_url": "https://simple.ripley.com.pe/laptop-hp-15-fd0029la-intel-core-i5-1334u-12gb-ram-512gbssd-156-2004351827222p?ismb=true&mai=v2_UjVS-PdIUtUfZ06RnG-PtoYSnXFa_jTLWYZui-hY1Ywy57sRKT4GlAFcnPzi0rx6waudsjAKTIg0jB2DvymapebTwBu2jLe_qt5EfIvrtDbqFnDouOpjwtYjF9XY5429RPhVfuWLmGHA5cvcC7ovDoE4OhlcCTi3pJI3JjLEJSU%3D_ark19774fde28b586e507",
      "domain": "simple.ripley.com.pe"
    },
    {
      "title": "LAPTOP HP 15-FB2002LA AMD RYZEN 5 8645HS 8GB RAM 512GB SSD 15.6\" RTX 3050",
      "price": 3.099,
      "image_url": "https://rimage.ripley.com.pe/home.ripley/Attachment/WOP/1/2004335737608/full_image-2004335737608.WEBP",
      "product_url": "https://simple.ripley.com.pe/laptop-hp-15-fb2002la-am

In [39]:
import json
from urllib.parse import urlparse
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Inicializar Chrome en modo headless
def init_driver():
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument(
        '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
        'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
    )
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=options)

# Función para extraer productos de Plaza Vea
def scrape_plazavea(url):
    driver = init_driver()
    driver.get(url)
    wait = WebDriverWait(driver, 20)
    # Esperar contenedores de productos
    containers = wait.until(
        EC.presence_of_all_elements_located(
            (By.CSS_SELECTOR, 'div.HA.Showcase.Showcase--non-food.ga-product-item')
        )
    )

    results = []
    for c in containers:
        try:
            # Título desde data attribute
            title = c.get_attribute('data-ga-name')

            # Precio de oferta
            price_elem = c.find_element(By.CSS_SELECTOR, '.Showcase__salePrice')
            price_text = price_elem.get_attribute('data-price')
            price = float(price_text.replace(',', ''))

            # Imagen del producto
            img_el = c.find_element(By.CSS_SELECTOR, 'img.showcase__image')  # ✅ Selector exacto
            image_url = img_el.get_attribute('src')

            #img_el = c.find_element(By.TAG_NAME, 'img')
            #image_url = img_el.get_attribute('src')
            #img_el = c.find_element(By.XPATH, './/img[@class="showcase__image" and @alt="product image"]')
            #image_url = img_el.get_attribute('src')

            #Obtener el contenedor de la imagen
            #product_container = c.find_element(By.CSS_SELECTOR, 'figure.Showcase__photo')

            # Imagen
            #img_el = product_container.find_element(By.TAG_NAME, 'img')
            #image_url = img_el.get_attribute('src')

            # URL del producto
            link_el = c.find_element(By.CSS_SELECTOR, 'a.Showcase__link')
            product_url = link_el.get_attribute('href')

            # Dominio
            domain = urlparse(product_url).netloc

            results.append({
                'title': title,
                'price': price,
                'image_url': image_url,
                'product_url': product_url,
                'domain': domain
            })
        except Exception:
            continue

    driver.quit()
    return results

if __name__ == '__main__':
    url = 'https://www.plazavea.com.pe/tecnologia/computo/laptops'
    items = scrape_plazavea(url)
    count = len(items)
    output = {
        'success': count > 0,
        'count': count,
        'results': items
    }
    print(json.dumps(output, ensure_ascii=False, indent=2))
    print(f"Se recuperaron {count} artículos.")


{
  "success": true,
  "count": 21,
  "results": [
    {
      "title": "Laptop LENOVO IdeaPad Slim3 15ABR8 15.6\" AMD Ryzen 7 5825U 16GB 1TB SSD",
      "price": 2299.0,
      "image_url": "",
      "product_url": "https://www.plazavea.com.pe/laptop-lenovo-ideapad-slim3-15abr8-15-6--amd-ryzen-7-5825u-16gb-1tb-ssd/p",
      "domain": "www.plazavea.com.pe"
    },
    {
      "title": "Laptop LENOVO Ideapad SLIM 3 14IRH8 14\" Intel Core i7-13620H 16GB 512GB SSD",
      "price": 2629.0,
      "image_url": "",
      "product_url": "https://www.plazavea.com.pe/laptop-lenovo-ideapad-slim-3-14irh8-14--intel-core-i713620h-16gb-512gb-ssd/p",
      "domain": "www.plazavea.com.pe"
    },
    {
      "title": "Laptop HP 15-FC0031LA 15.6\" AMD Ryzen 7 7730U 16GB 1TB SSD",
      "price": 2419.0,
      "image_url": "",
      "product_url": "https://www.plazavea.com.pe/laptop-hp-15fc0031la-15-6--amd-ryzen-7-7730u-16gb-1tb-ssd/p",
      "domain": "www.plazavea.com.pe"
    },
    {
      "title": "Lapto