# **BOOKING SCRAPER**

In [9]:
# ------------------- IMPORTS -------------------
import time
import csv
import logging
import pandas as pd
import random # Importamos random para tiempos de espera humanos
from datetime import datetime, timedelta

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

logging.basicConfig(filename="scraper_errors.log", level=logging.ERROR)

# ------------------- CONFIG -------------------
class Config:
    BASE_URL = "https://www.booking.com"
    TIMEOUT = 15 # Aumentamos el timeout general
    SCROLL_PAUSE = 3 # Pausa base al scrollear
    HEADLESS = False 

# ------------------- DATE RANGE -------------------
def generate_dates_range(checkin, delta_weeks=0): 
    checkin_date = datetime.strptime(checkin, "%Y-%m-%d")
    start_date = checkin_date - timedelta(weeks=delta_weeks)
    end_date = checkin_date + timedelta(weeks=delta_weeks)
    
    date_ranges = []
    current_date = start_date
    while current_date <= end_date:
        checkout_date = current_date + timedelta(days=7)
        date_ranges.append((
            current_date.strftime("%Y-%m-%d"),
            checkout_date.strftime("%Y-%m-%d")
        ))
        current_date += timedelta(days=1)
    return date_ranges

# ------------------- SCRAPER CLASS -------------------
class BookingScraper:
    def __init__(self, config, city, checkin, adults=2, children=0, rooms=1, currency="EUR", language="English (UK)"):
        self.config = config
        self.city = city
        self.checkin = checkin
        self.adults = adults
        self.children = children
        self.rooms = rooms
        self.currency = currency
        self.language = language
        
        self.driver = self._init_driver()
        self.wait = WebDriverWait(self.driver, self.config.TIMEOUT)

    def _init_driver(self):
        options = Options()
        if self.config.HEADLESS:
            options.add_argument("--headless=new")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--incognito")
        options.add_argument("--start-maximized") # Importante: Maximizar ayuda a ver elementos
        service = Service(ChromeDriverManager().install())
        return webdriver.Chrome(service=service, options=options)

    def build_booking_url(self, checkin_date, checkout_date):
        return (
            f"{self.config.BASE_URL}/searchresults.html?"
            f"ss={self.city.replace(' ','+')} &checkin={checkin_date}&checkout={checkout_date}"
        )

    # ------------------- SETTERS -------------------
    def set_currency(self):
        try:
            currency_button = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "span.ca2ca5203b")))
            self.driver.execute_script("arguments[0].click();", currency_button)
            time.sleep(1)
            currency_option = self.wait.until(EC.element_to_be_clickable((By.XPATH, f"//div[contains(@class,'CurrencyPicker_currency') and text()='{self.currency}']")))
            self.driver.execute_script("arguments[0].click();", currency_option)
            time.sleep(2)
        except: pass

    def set_language(self):
        try:
            lang_button = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.c3bdfd4ac2.b96c49a28c.c256f1a28a.a18a170653.c82b1db4f9.ba98511f54")))
            self.driver.execute_script("arguments[0].click();", lang_button)
            time.sleep(1)
            lang_option = self.wait.until(EC.element_to_be_clickable((By.XPATH, f"//span[contains(@class,'Picker_selection-text') and text()='{self.language}']")))
            self.driver.execute_script("arguments[0].click();", lang_option)
            time.sleep(2)
        except: pass

    def set_guests(self):
        try:
            occupancy_button = self.wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-testid='occupancy-config']")))
            occupancy_button.click()
            time.sleep(1)
            rows = self.wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.e301a14002")))
            def adjust_row(row, desired_value):
                counter = row.find_element(By.CSS_SELECTOR, "span.e32aa465fd")
                minus_button, plus_button = row.find_elements(By.TAG_NAME, "button")
                current_value = int(counter.text)
                while current_value < desired_value:
                    plus_button.click(); current_value += 1
                while current_value > desired_value:
                    minus_button.click(); current_value -= 1
            adjust_row(rows[0], self.adults)
            adjust_row(rows[1], self.children)
            adjust_row(rows[2], self.rooms)
            self.driver.execute_script("document.body.click()") 
            time.sleep(1)
        except Exception as e: logging.error(f"Error setting guests: {e}")

    # ------------------- LOAD ALL RESULTS (MEJORADO) -------------------
    def load_all_results(self):
        print("üîÑ Loading all results (Deep Scroll mode)...")
        time.sleep(2)
        
        last_height = self.driver.execute_script("return document.body.scrollHeight")
        consecutive_errors = 0
        
        while True:
            # 1. Scroll al fondo
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(3) # Espera base
            
            # 2. Buscar bot√≥n "Load more"
            try:
                load_more_button = self.driver.find_element(By.XPATH, "//button[.//span[contains(text(), 'Load more results')]]")
                
                if load_more_button.is_displayed():
                    print("‚¨áÔ∏è Found 'Load more' button. Clicking...")
                    self.driver.execute_script("arguments[0].click();", load_more_button)
                    
                    # === AQU√ç EL CAMBIO CLAVE: TIEMPO DE ESPERA LARGO ===
                    wait_time = random.uniform(5, 8) # Esperar entre 5 y 8 segundos
                    print(f"‚è≥ Waiting {wait_time:.1f}s for new results to load...")
                    time.sleep(wait_time)
                    
                    consecutive_errors = 0 # Reiniciar contador de errores
                else:
                    raise Exception("Button hidden")

            except:
                # Si falla, puede ser que hayamos llegado al final O que est√© cargando lento
                # Intentamos mover el scroll un poco arriba y abajo para "despertar" la p√°gina
                print("‚ö†Ô∏è No button found via scroll. Checking if truly finished...")
                
                self.driver.execute_script("window.scrollBy(0, -300);")
                time.sleep(1)
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)
                
                new_height = self.driver.execute_script("return document.body.scrollHeight")
                
                # Contamos cu√°ntas tarjetas hay visibles para informar
                cards = self.driver.find_elements(By.CSS_SELECTOR, "div[data-testid='property-card']")
                print(f"   -> Current loaded count: {len(cards)}")

                if new_height == last_height:
                    consecutive_errors += 1
                    if consecutive_errors >= 2: # Si falla 2 veces seguidas, asumimos fin
                        print("‚úÖ Reached absolute bottom.")
                        break
                else:
                    last_height = new_height
                    consecutive_errors = 0 # La altura cambi√≥, seguimos intentando

    def close(self):
        self.driver.quit()

# ------------------- SCRAPING FUNCTION -------------------
def scrape_city(scraper):
    all_hotels = []
    date_ranges = generate_dates_range(scraper.checkin)

    for checkin_date, checkout_date in date_ranges:
        print(f"üîπ Scraping {scraper.city} from {checkin_date} to {checkout_date}")
        url = scraper.build_booking_url(checkin_date, checkout_date)
        scraper.driver.get(url)
        time.sleep(5) # Espera inicial larga

        scraper.set_currency()
        scraper.set_language()
        scraper.set_guests()
        
        # Carga exhaustiva
        scraper.load_all_results()

        # Recolectar links
        print("üîç Collecting hotel links...")
        hotel_cards = scraper.driver.find_elements(By.CSS_SELECTOR, "div[data-testid='property-card']")
        
        unique_links = set()
        hotel_links = []
        for card in hotel_cards:
            try:
                link_elem = card.find_element(By.TAG_NAME, "a")
                href = link_elem.get_attribute("href")
                if href and href not in unique_links:
                    unique_links.add(href)
                    hotel_links.append(href)
            except: continue
        
        print(f"üéâ FOUND {len(hotel_links)} HOTELS TOTAL. Starting extraction...")

        # Extracci√≥n 1 a 1
        for i, link in enumerate(hotel_links):
            try:
                scraper.driver.get(link)
                time.sleep(random.uniform(1.5, 3)) # Espera variable para parecer humano
                
                # Scroll dentro de la ficha para cargar precios din√°micos
                scraper.driver.execute_script("window.scrollBy(0, 300);")

                # Nombre
                try:
                    name = WebDriverWait(scraper.driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, "h2.pp-header__title"))).text.strip()
                except: 
                    try: name = scraper.driver.find_element(By.ID, "hp_hotel_name").text.strip()
                    except: name = "N/A"

                # Descripci√≥n
                try:
                    description = scraper.driver.find_element(By.CSS_SELECTOR, "p[data-testid='property-description']").text
                except: 
                    description = "N/A"

                # Precio
                try:
                    price_text = scraper.driver.find_element(By.CSS_SELECTOR, "div.bui-price-display__value span.prco-valign-middle-helper").text
                    price = int(price_text.replace("‚Ç¨","").replace("\xa0","").replace(",","").strip())
                except:
                    try:
                        # Intento secundario para precio
                        price_text = scraper.driver.find_element(By.XPATH, "//span[contains(@class, 'prco-valign-middle-helper')]").text
                        price = int(price_text.replace("‚Ç¨","").replace("\xa0","").replace(",","").strip())
                    except:
                        price = None

                all_hotels.append({
                    "city": scraper.city, "hotel": name, "text": description,
                    "price": price, "link": link, "date": f"{checkin_date} to {checkout_date}"
                })
                print(f"[{i+1}/{len(hotel_links)}] OK: {name} - {price}‚Ç¨")

            except Exception as e:
                print(f"‚ùå Error scraping URL {link}: {e}")
                continue

    return all_hotels

# ------------------- RUN -------------------
if __name__ == "__main__":
    config = Config()
    city = "Alicante"
    checkin = "2026-03-16"
    
    scraper = BookingScraper(config, city, checkin, adults=2, children=0, rooms=1)

    hotels_data = scrape_city(scraper)
    scraper.close()

    if hotels_data:
        df = pd.DataFrame(hotels_data)
        df['text'] = df['text'].astype(str).str.replace("\n", " ").str.replace('"', "'")
        filename = f"booking_{city}_FULL_V3.csv"
        df.to_csv(filename, index=False, sep=",", quoting=csv.QUOTE_ALL, encoding="utf-8")
        print(f"‚úÖ CSV saved: {filename}")

üîπ Scraping Alicante from 2026-03-16 to 2026-03-23
üîÑ Loading all results (Deep Scroll mode)...
‚ö†Ô∏è No button found via scroll. Checking if truly finished...
   -> Current loaded count: 75
‚¨áÔ∏è Found 'Load more' button. Clicking...
‚è≥ Waiting 5.5s for new results to load...
‚¨áÔ∏è Found 'Load more' button. Clicking...
‚è≥ Waiting 6.7s for new results to load...
‚¨áÔ∏è Found 'Load more' button. Clicking...
‚è≥ Waiting 5.8s for new results to load...
‚¨áÔ∏è Found 'Load more' button. Clicking...
‚è≥ Waiting 7.5s for new results to load...
‚¨áÔ∏è Found 'Load more' button. Clicking...
‚è≥ Waiting 7.9s for new results to load...
‚¨áÔ∏è Found 'Load more' button. Clicking...
‚è≥ Waiting 6.2s for new results to load...
‚¨áÔ∏è Found 'Load more' button. Clicking...
‚è≥ Waiting 6.5s for new results to load...
‚¨áÔ∏è Found 'Load more' button. Clicking...
‚è≥ Waiting 5.7s for new results to load...
‚¨áÔ∏è Found 'Load more' button. Clicking...
‚è≥ Waiting 7.5s for new results to load...
‚¨á