In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import json
import time
import os

def setup_driver():
    options = Options()
    # options.add_argument("--headless")  # Actívalo si no necesitas ver el navegador
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

def aceptar_cookies(driver):
    try:
        boton = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "button#didomi-notice-agree-button"))
        )
        boton.click()
        time.sleep(1)
    except:
        pass

def extraer_detalles(driver, url):
    detalles = {
        "descripcion_ampliada": "",
        "certificado_energetico": "",
        "tipo_vivienda": "",
        "superficie_construida": "",
        "superficie_util": "",
        "imagen_destacada": ""
    }

    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "body")))
        aceptar_cookies(driver)

        # Descripción
        try:
            detalles["descripcion_ampliada"] = driver.find_element(By.CSS_SELECTOR, "div.description__content").text.strip()
        except:
            pass

        # Características
        features = driver.find_elements(By.CSS_SELECTOR, "div.features__feature")
        for f in features:
            try:
                label = f.find_element(By.CSS_SELECTOR, "span.features__label").text.strip()
                value = f.find_element(By.CSS_SELECTOR, "span.features__value").text.strip()
                if "Tipo de casa" in label:
                    detalles["tipo_vivienda"] = value
                elif "Superficie construida" in label:
                    detalles["superficie_construida"] = value
                elif "Superficie útil" in label:
                    detalles["superficie_util"] = value
            except:
                continue

        # Certificado energético
        try:
            cert = driver.find_element(By.XPATH, "//h2[contains(text(), 'Certificado energético')]/following-sibling::p")
            detalles["certificado_energetico"] = cert.text.strip()
        except:
            pass

        # Imagen destacada
        try:
            img = driver.find_element(By.CSS_SELECTOR, "div.masonry__item picture img")
            detalles["imagen_destacada"] = img.get_attribute("src")
        except:
            pass

    except Exception as e:
        print(f"❌ Error en {url}: {e}")
    return detalles

def scrape_10000(input_file="data/venta_madrid_completo2.json", output_file="data/venta_madrid_detalle_10000.json"):
    with open(input_file, "r", encoding="utf-8") as f:
        inmuebles = json.load(f)

    driver = setup_driver()
    resultados = []

    for i, item in enumerate(inmuebles[:10000]):
        print(f"🔎 ({i+1}) {item['link']}")
        detalles = extraer_detalles(driver, item['link'])
        item.update(detalles)
        resultados.append(item)

        # Guardar por bloques cada 100
        if (i + 1) % 100 == 0:
            with open(output_file, "w", encoding="utf-8") as f:
                json.dump(resultados, f, ensure_ascii=False, indent=2)

    driver.quit()
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(resultados, f, ensure_ascii=False, indent=2)
    print(f"\n💾 Guardado: {output_file} ({len(resultados)} registros)")

if __name__ == "__main__":
    scrape_10000()


🔎 (1) https://www.pisos.com/comprar/piso-hoyo_de_manzanares_centro_urbano-46711883004_994129/
🔎 (2) https://www.pisos.com/comprar/piso-getafe_juan_de_la_cierva-51688983208_101000/
🔎 (3) https://www.pisos.com/comprar/casa_adosada-camarma_de_esteruelas_centro_urbano-51764596522_101800/
🔎 (4) https://www.pisos.com/comprar/piso-zona_sureste-5129093556_109700/
🔎 (5) https://www.pisos.com/comprar/piso-pinar_punta_galea28290-53410897274_100500/
🔎 (6) https://www.pisos.com/comprar/piso-fuente_del_berro28028-631974332_440000/
🔎 (7) https://www.pisos.com/comprar/casa_adosada-camarma_de_esteruelas_centro_urbano-40846660855_101800/
🔎 (8) https://www.pisos.com/comprar/casa_adosada-estacion_zona_norte28224-5170794907_109700/
🔎 (9) https://www.pisos.com/comprar/piso-guindalera28028-52544082985_440000/
🔎 (10) https://www.pisos.com/comprar/piso-casco_historico_de_vallecas28031-50073520919_100200/
🔎 (11) https://www.pisos.com/comprar/piso-coslada_casco_antiguo28821-52549274330_100200/
🔎 (12) https://www