# UCU - Proyecto Webscraping
Proyecto para la Licenciatura en Datos y Negocios de la Universidad Católica del Uruguay

# Scraping de propiedades — Inmobiliaria Uruguay
Este notebook descarga listados **públicos** desde *inmobiliariauruguay.com* usando `requests`
y analiza el HTML con `BeautifulSoup` para extraer:

- Ciudad (agrupado por **Departamento** en el sitio, p. ej. *Montevideo*, *Maldonado*, *Canelones*)
- Precio (USD)
- Tamaño (m², si está disponible)
- Dormitorios (si está disponible)
- Link al aviso

Requisitos:
- `requests`, `beautifulsoup4` (instalar con `pip install requests beautifulsoup4`)

## 1) Imports y funciones auxiliares

In [5]:

import re
import json
import time
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
    "Accept-Language": "es-ES,es;q=0.9"
}

BASE = "https://inmobiliariauruguay.com"

session = requests.Session()
session.headers.update(HEADERS)

def get_soup(url):
    resp = session.get(url, timeout=30)
    resp.raise_for_status()
    return BeautifulSoup(resp.text, "html.parser")

def to_int(text):
    if text is None:
        return None
    nums = re.findall(r"[0-9][0-9\.]*", text.replace("\xa0"," "))
    if not nums:
        return None
    n = nums[0].replace(".", "")
    try:
        return int(n)
    except ValueError:
        return None

def clean_m2(x):
    if x is None:
        return None
    x = x.replace("m2", "").replace("m²", "")
    return to_int(x)

def parse_title_city(title):
    if not title:
        return None
    import re
    parts = re.split(r"[,–-]", title)
    last = parts[-1].strip() if parts else None
    if last:
        last = re.sub(r"\.$", "", last)
    return last or None

## 2) Listado y paginación

In [6]:

def extract_cards_from_listing(list_url, max_pages=5):
    out = []
    page = 1
    url = list_url
    while page <= max_pages and url:
        soup = get_soup(url)
        for card in soup.select("h2 a, h3 a"):
            href = card.get("href")
            if not href:
                continue
            title = card.get_text(strip=True)
            price_el = None
            candidate = card
            for _ in range(5):
                candidate = candidate.find_parent()
                if not candidate: break
                price_el = candidate.find(string=lambda s: isinstance(s, str) and "USD" in s)
                if price_el: break
            price_text = (price_el.strip() if isinstance(price_el, str) else price_el.get_text(strip=True)) if price_el else None
            out.append({
                "title": title,
                "price_text": price_text,
                "detail_url": urljoin(BASE, href)
            })
        next_link = None
        for a in soup.select("a"):
            if a.get_text(strip=True) == str(page+1):
                next_link = urljoin(BASE, a.get("href"))
        if next_link and next_link != url:
            url = next_link
            page += 1
        else:
            break
    return out

## 3) Parser de la página de detalle

In [8]:
def parse_detail(detail_url):
    soup = get_soup(detail_url)
    title_el = soup.find(["h1","h2"])
    title = title_el.get_text(strip=True) if title_el else None

    price = None
    size_m2 = None
    bedrooms = None
    city = None

    detalles = {}
    for li in soup.select("li"):
        txt = li.get_text(" ", strip=True)
        if ":" in txt:
            k, v = [t.strip() for t in txt.split(":", 1)]
            detalles[k.lower()] = v

    for key in ["precio"]:
        if key in detalles:
            price = to_int(detalles[key])

    for key in ["tamaño de inmueble", "tamaño del inmueble", "superficie", "area de terreno", "área de terreno", "total edificado", "dificada"]:
        if key in detalles and size_m2 is None:
            size_m2 = clean_m2(detalles[key])

    for key in ["dormitorios", "dormitorio", "habitaciones"]:
        if key in detalles and bedrooms is None:
            bedrooms = to_int(detalles[key])

    if "departamento" in detalles:
        city = detalles["departamento"].split(",")[0].strip()
    if not city and title:
        city = parse_title_city(title)

    if price is None:
        import re
        usd_text = soup.find(string=re.compile(r"USD\s*[0-9\.,]+"))
        if usd_text:
            price = to_int(str(usd_text))

    return {
        "ciudad": city,
        "precio": price,
        "tamano": size_m2,
        "habitaciones": bedrooms,
        "link": detail_url
    }

## 4) Ejecución principal y guardado a `propiedades.json`

In [None]:
def scrape_por_ciudades(objetivos, min_props=10, max_pages=5, delay=1.0):
    data = {"ciudades": []}
    import time
    for nombre, list_url in objetivos.items():
        print(f"Recolectando en {nombre} ...")
        cards = extract_cards_from_listing(list_url, max_pages=max_pages)
        print(f"  {len(cards)} tarjetas encontradas")
        props = []
        seen = set()
        for card in cards:
            if len(props) >= min_props:
                break
            detail = card["detail_url"]
            if detail in seen:
                continue
            seen.add(detail)
            try:
                info = parse_detail(detail)
                if not info.get("ciudad"):
                    info["ciudad"] = nombre
                props.append({
                    "precio": info.get("precio"),
                    "tamano": info.get("tamano"),
                    "habitaciones": info.get("habitaciones"),
                    "link": info.get("link")
                })
                print(f"    OK: {detail}")
                time.sleep(delay)
            except Exception as e:
                print(f"    Error {detail}: {e}")
        data["ciudades"].append({
            "nombre": nombre,
            "propiedades": props[:min_props]
        })
    return data

OBJETIVOS = {
    "Montevideo": "https://inmobiliariauruguay.com/search-results/?type%5B%5D=urbana&states%5B%5D=montevideo",
    "Maldonado": "https://inmobiliariauruguay.com/state/maldonado/",
    "Canelones": "https://inmobiliariauruguay.com/state/canelones/",
}

data = scrape_por_ciudades(OBJETIVOS, min_props=10, max_pages=5, delay=0.5)

with open("propiedades.json", "w", encoding="utf-8") as f:
    import json
    json.dump(data, f, ensure_ascii=False, indent=2)

print("Archivo generado: propiedades.json")

Recolectando en Montevideo ...
  22 tarjetas encontradas
    OK: https://inmobiliariauruguay.com/property/apartamento-de-2-dormitorios-en-buceo-montevideo/
    OK: https://inmobiliariauruguay.com/property/casa-2-dormitorios-fondo-entrada-auto-con-porton-en-la-union-montevideo/
    OK: https://inmobiliariauruguay.com/property/2-casa-en-venta-en-punta-de-manga-montevideo/
    OK: https://inmobiliariauruguay.com/property/2-casas-en-un-mismo-padron-en-puntas-de-manga-montevideo/
    OK: https://inmobiliariauruguay.com/property/galpon-a-la-venta-en-montevideo-2/
    OK: https://inmobiliariauruguay.com/property/galpon-a-la-venta-en-montevideo/
    OK: https://inmobiliariauruguay.com/property/apartamento-en-montevideo-9/
    OK: https://inmobiliariauruguay.com/property/apartamento-en-montevideo-8/
    OK: https://inmobiliariauruguay.com/property/hermoso-apartamento-en-montevideo/
    OK: https://inmobiliariauruguay.com/property/apartamento-en-montevideo-6/
Recolectando en Maldonado ...
