In [3]:
import csv
import re
import time
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup

BASE_URL = "https://www.coordenadas.com.es"
START_PATH = "/espana/pueblos-de-madrid/28/1"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (compatible; simple-scraper/1.0)"
}

def get_soup(url: str) -> BeautifulSoup:
    r = requests.get(url, headers=HEADERS, timeout=30)
    r.raise_for_status()
    return BeautifulSoup(r.text, "html.parser")

def extract_total_pages(soup: BeautifulSoup) -> int:
    pag = soup.select_one("ul.pagination")
    if not pag:
        return 1

    max_page = 1
    for a in pag.select("a[href]"):
        m = re.search(r"/espana/pueblos-de-madrid/28/(\d+)", a["href"])
        if m:
            max_page = max(max_page, int(m.group(1)))
    return max_page

def parse_table_rows(soup: BeautifulSoup):
    table = soup.select_one("table.table")
    if not table:
        return []

    results = []
    for tr in table.select("tbody tr"):
        tds = tr.select("td")
        if len(tds) < 3:
            continue

        a = tds[0].select_one("a")
        if not a:
            continue

        city = re.sub(r"^Coordenadas\s+", "", a.get_text(strip=True))

        coords = tds[2].get_text(strip=True)
        if "," not in coords:
            continue

        try:
            lat, lon = map(float, coords.split(",", 1))
        except ValueError:
            continue

        results.append({"city": city, "lat": lat, "lon": lon})

    return results

def scrape_all():
    soup = get_soup(urljoin(BASE_URL, START_PATH))
    total_pages = extract_total_pages(soup)

    all_rows = []
    for page in range(1, total_pages + 1):
        url = urljoin(BASE_URL, f"/espana/pueblos-de-madrid/28/{page}")
        rows = parse_table_rows(get_soup(url))
        all_rows.extend(rows)
        time.sleep(0.8)

    return all_rows

def save_csv(rows, filename="pueblos_madrid_coords.csv"):
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["city", "lat", "lon"])
        writer.writeheader()
        writer.writerows(rows)

if __name__ == "__main__":
    rows = scrape_all()
    print(f"Total filas: {len(rows)}")
    save_csv(rows)
    print("CSV generado correctamente")


Total filas: 179
CSV generado correctamente
