In [3]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# ⚙️ Setup navegador
def setup_driver():
    options = Options()
    # options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# 🍪 Aceptar cookies
def aceptar_cookies(driver):
    try:
        wait = WebDriverWait(driver, 10)
        boton = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#didomi-notice-agree-button")))
        boton.click()
        print("🍪 Cookies aceptadas.")
        time.sleep(2)
    except:
        print("⚠️ Cookies ya aceptadas o no detectadas.")

# 🧩 Extraer detalles
def extraer_detalles(driver):
    detalles = {}

    try:
        conservacion = driver.find_element(By.XPATH, "//span[contains(., 'Conservación')]/following-sibling::span").text.strip()
        detalles["conservacion"] = conservacion
    except:
        detalles["conservacion"] = None

    try:
        precio_m2 = driver.find_element(By.CSS_SELECTOR, "li.features-summary__item:nth-child(5)").text.strip()
        detalles["precio_m2"] = precio_m2
    except:
        detalles["precio_m2"] = None

    try:
        certificado = driver.find_element(By.CSS_SELECTOR, "p.details__block.energy-certificate").text.strip()
        detalles["certificado_energetico"] = certificado.replace("Clasificación: ", "")
    except:
        detalles["certificado_energetico"] = None

    return detalles

# 📥 Cargar URLs de alquiler
df = pd.read_csv("C:/Users/pablo/proyecto/comparador/alquiler_con_tipos_detectados.csv")
urls = df["link"].dropna().unique().tolist()

# 🚀 Iniciar scraping
driver = setup_driver()
resultados = []

print(f"🔍 Scrapeando detalles de {len(urls)} inmuebles de alquiler...\n")

for i, url in enumerate(urls, 1):
    print(f"{i}/{len(urls)} → {url}")
    driver.get(url)
    time.sleep(2)
    if i == 1:
        aceptar_cookies(driver)

    detalles = extraer_detalles(driver)
    detalles["link"] = url
    resultados.append(detalles)
    time.sleep(1)

driver.quit()

# 💾 Guardar
df_resultado = pd.DataFrame(resultados)
df_resultado.to_csv("C:/Users/pablo/proyecto/comparador/alquiler_detalles_scrapeados.csv", index=False)
print("\n✅ Detalles guardados en alquiler_detalles_scrapeados.csv")


🔍 Scrapeando detalles de 1477 inmuebles de alquiler...

1/1477 → https://www.pisos.com/alquilar/piso-la_finca-49178402159_280500/
🍪 Cookies aceptadas.
2/1477 → https://www.pisos.com/alquilar/piso-galapagar_la_navata28420-43338541420_100500/
3/1477 → https://www.pisos.com/alquilar/casa_unifamiliar-las_matas_los_penascales-29189878080_280500/
4/1477 → https://www.pisos.com/alquilar/piso-rios_rosas28003-52592214693_104700/
5/1477 → https://www.pisos.com/alquilar/piso-zona_avenida_de_europa28224-45068829832_101800/
6/1477 → https://www.pisos.com/alquilar/piso-justicia_chueca28004-52539451934_108900/
7/1477 → https://www.pisos.com/alquilar/atico-galapagar_la_navata28260-20849829404_101800/
8/1477 → https://www.pisos.com/alquilar/estudio-parque_empresarial28290-45008053320_101800/
9/1477 → https://www.pisos.com/alquilar/piso-somosaguas_humera_los_angeles-52524748784_280500/
10/1477 → https://www.pisos.com/alquilar/piso-retiro_estrella28007-53370434507_100500/
11/1477 → https://www.pisos.com/