In [20]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager

from bs4 import BeautifulSoup
import pandas as pd
import re

In [28]:
PRICE_RE      = re.compile(r'\$\s*\d{1,3}(?:\.\d{3})+(?:,\d+)?')
DISCOUNT_RE   = re.compile(r'(\d{1,2})\s*%\s*OFF', re.I)
SENTENCE_SPLIT_RE = re.compile(r'\.(?=\s*[A-ZÁÉÍÓÚÑ])') 

In [37]:
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
driver.get('https://www.mercadolibre.com.co/')

# Espera hasta que el input esté presente
wait = WebDriverWait(driver, 10)
search_box = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="cb1-edit"]')))

# Escribe en el campo de búsqueda
search_box.send_keys('computador')

# Espera hasta que el botón esté presente y haz clic
search_button = wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/header/div/div[2]/form/button')))
search_button.click()

wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'li.ui-search-layout__item')))

# Parsear con BeautifulSoup
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')

# Selecciona todos los items
items = soup.select('li.ui-search-layout__item')

contador = 0
productos = []

for item in items:
    link = item.select_one('a.poly-component__title')
    driver.switch_to.new_window()
    driver.get(link["href"])
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.ui-pdp-container.ui-pdp-container--pdp')))
    
    # Parsear con BeautifulSoup
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    
    price = soup.select_one('div.ui-pdp-price__second-line')    
    cuotas = soup.select_one('div.ui-pdp-price__subtitles')
    informacion = soup.select_one('ul.ui-vpp-highlighted-specs__features-list')
    
    m_price = PRICE_RE.search(price.text) if price else None
    m_discount = DISCOUNT_RE.search(price.text) if price else None
    m_informacion = SENTENCE_SPLIT_RE.split(informacion.text) if informacion else None
    
    producto = {
        "precio": m_price.group(0) if m_price else 'No price found',
        "descuento": m_discount.group(0) if m_discount else 'No discount found',
        "cuotas": cuotas.text if cuotas else 'No cuotas found',
        "informacion": m_informacion if m_informacion else 'No informacion found'
    }

    productos.append(producto)

    contador += 1
    
    if contador >= 20:
        break

driver.quit()

In [16]:
from pymongo import MongoClient
from datetime import datetime
import pprint

In [38]:
client = MongoClient("localhost:27017")

db = client["Meli"]
collection = db["coleccion"]

result = collection.insert_many(productos)
print("IDs insertados:", result.inserted_ids)

IDs insertados: [ObjectId('689b59738f71c1dba9f896f9'), ObjectId('689b59738f71c1dba9f896fa'), ObjectId('689b59738f71c1dba9f896fb'), ObjectId('689b59738f71c1dba9f896fc'), ObjectId('689b59738f71c1dba9f896fd'), ObjectId('689b59738f71c1dba9f896fe'), ObjectId('689b59738f71c1dba9f896ff'), ObjectId('689b59738f71c1dba9f89700'), ObjectId('689b59738f71c1dba9f89701'), ObjectId('689b59738f71c1dba9f89702'), ObjectId('689b59738f71c1dba9f89703'), ObjectId('689b59738f71c1dba9f89704'), ObjectId('689b59738f71c1dba9f89705'), ObjectId('689b59738f71c1dba9f89706'), ObjectId('689b59738f71c1dba9f89707'), ObjectId('689b59738f71c1dba9f89708'), ObjectId('689b59738f71c1dba9f89709'), ObjectId('689b59738f71c1dba9f8970a'), ObjectId('689b59738f71c1dba9f8970b'), ObjectId('689b59738f71c1dba9f8970c')]


In [39]:
for doc in collection.find():
    pprint.pprint(doc)

{'_id': ObjectId('689b59738f71c1dba9f896f9'),
 'cuotas': '12 cuotas de $175.695 con 0% interés',
 'descuento': '13% OFF',
 'informacion': ['Capacidad total del módulo de memoria RAM: 16 GB',
                 'Modelo del procesador: 13420H',
                 'Marca del procesador: Intel',
                 'Sistema operativo: Windows 11 Pro.'],
 'precio': '$2.108.344'}
{'_id': ObjectId('689b59738f71c1dba9f896fa'),
 'cuotas': '3 cuotas de $523.300 con 0% interés',
 'descuento': '30% OFF',
 'informacion': ['Con pantalla táctil: No',
                 'Conexión wifi y bluetooth',
                 'Incluye lector de tarjeta de memoria',
                 'Posee pad numérico.'],
 'precio': '$1.569.899'}
{'_id': ObjectId('689b59738f71c1dba9f896fb'),
 'cuotas': '12 cuotas de $175.695 con 0% interés',
 'descuento': '13% OFF',
 'informacion': ['Capacidad total del módulo de memoria RAM: 16 GB',
                 'Modelo del procesador: 13420H',
                 'Marca del procesador: Intel',
       

In [42]:
for doc in collection.find({"descuento": {"$gt": "40"}}):
    pprint.pprint(doc)

{'_id': ObjectId('689b59738f71c1dba9f896fd'),
 'cuotas': '3 cuotas de $727.815 con 0% interés',
 'descuento': '45% OFF',
 'informacion': ['Capacidad de disco SSD: 512 GB',
                 'Capacidad del disco duro: 0 MB',
                 'Capacidad total del módulo de memoria RAM: 8 GB',
                 'Pantalla LED Full HD',
                 ' Sistema operativo Windows 11 Home',
                 'Con cámara web FullHD',
                 'Tiene micrófono incorporado',
                 'Se conecta por Bluetooth a otros dispositivos y al Wi-Fi',
                 'Tiene puerto Ethernet.'],
 'precio': '$2.183.445'}
{'_id': ObjectId('689b59738f71c1dba9f896fe'),
 'cuotas': '3 cuotas de $1.199.967 con 0% interés',
 'descuento': 'No discount found',
 'informacion': ['Memoria RAM: 16 GBModelo del procesador: 5600Línea del '
                 'procesador: Ryzen 5Marca del procesador: AMDSistema '
                 'operativo: Windows 11 Home Home.'],
 'precio': '$3.599.900'}
{'_id': ObjectId('

In [47]:
# Sumar el total de precios
total_precio = 0
for doc in collection.find():
    precio = doc.get("precio", "0").replace(".", "").replace("$", "").strip()
    total_precio += int(precio)

print("El precio total es: $", total_precio)

El precio total es: $ 42629307
