In [2]:
# Import libraries
import os
import time
import requests
import pandas as pd
from dotenv import load_dotenv

In [3]:
# CONFIGURATION
load_dotenv()
API_KEY = os.getenv("API_KEY")   
INPUT_CSV = os.path.join("Datasets", "municipios_madrid_menores_50000.csv")
OUTPUT_CSV = os.path.join("Datasets", "google_places_municipios.csv")

# Tipos que queremos (Places "type" values)
PLACE_TYPES = {
    "gym": ["gym"],  
    "restaurant": ["restaurant"],
    "pharmacy": ["pharmacy"],
    "school": ["school"],
    # para transporte vamos a combinar varios tipos
    "transport": ["transit_station", "bus_station", "train_station", "subway_station"]
}

# === PARÁMETROS DE BÚSQUEDA ===
# Radio en metros (ajusta si crees que es demasiado pequeño/grande)
RADIUS = 7000 #un radio entre 5.000 y 10.000 metros está bien
# De cada municipio, busca gimnasios, restaurantes, etc., en un área de 7 km a la redonda del centro del municipio.
MAX_PAGES = 1            # solo una página de resultados (cada página tiene hasta 20 resultados)
SLEEP_BETWEEN_REQS = 1.5 # segundos entre peticiones

In [4]:
# === FUNCIONES ===
def fetch_places(lat, lon, place_type):
    """Hace 1 petición Nearby Search para un tipo concreto."""
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    params = {
        "key": API_KEY,
        "location": f"{lat},{lon}",
        "radius": RADIUS,
        "type": place_type
    }
    r = requests.get(url, params=params, timeout=30)
    if r.status_code != 200:
        print(f"Error HTTP {r.status_code}: {r.text[:200]}")
        return []
    data = r.json()
    return data.get("results", [])

def summarize_places(results):
    """Devuelve métricas agregadas de los resultados de Google Places."""
    count = len(results)
    total_reviews = 0
    weighted_sum = 0
    ratings = []
    
    for p in results:
        rating = p.get("rating")
        n_reviews = p.get("user_ratings_total", 0) or 0
        if rating is not None:
            ratings.append(rating)
            weighted_sum += rating * n_reviews
            total_reviews += n_reviews

    weighted_avg = (weighted_sum / total_reviews) if total_reviews > 0 else None
    rating_min = min(ratings) if ratings else None
    rating_max = max(ratings) if ratings else None

    return {
        "count": count,
        "total_reviews": total_reviews,
        "weighted_avg": weighted_avg,
        "rating_min": rating_min,
        "rating_max": rating_max
    }

In [5]:
# === MAIN ===
def main():
    if API_KEY == "TU_API_KEY_AQUI":
        raise SystemExit("⚠️  Pon tu API key en la variable API_KEY antes de ejecutar.")

    df = pd.read_csv(INPUT_CSV)
    required_cols = {"cod_municipio", "municipio", "latitud", "longitud", "poblacion"}
    if not required_cols.issubset(df.columns):
        print(" Faltan columnas esperadas:", required_cols - set(df.columns))
        return

    print(f"Procesando {len(df)} municipios (modo gratuito, 1 página por tipo).")
    results = []

    for i, row in df.iterrows():
        cod = row["cod_municipio"]
        name = row["municipio"]
        lat, lon = row["latitud"], row["longitud"]

        print(f"[{i+1}/{len(df)}] {name} ({cod})")

        rec = {
            "cod_municipio": cod,
            "municipio": name,
            "latitud": lat,
            "longitud": lon,
            "poblacion": row["poblacion"]
        }

        for cat, subtypes in PLACE_TYPES.items():
            all_places = []
            for subtype in subtypes:
                try:
                    res = fetch_places(lat, lon, subtype)
                    all_places.extend(res)
                    time.sleep(SLEEP_BETWEEN_REQS)
                except Exception as e:
                    print(f"Error en {name} ({subtype}):", e)
            summary = summarize_places(all_places)
            rec[f"n_{cat}"] = summary["count"]
            rec[f"{cat}_total_reviews"] = summary["total_reviews"]
            rec[f"{cat}_weighted_avg_rating"] = (
                summary["weighted_avg"] if summary["weighted_avg"] is not None else ""
            )
            rec[f"{cat}_rating_min"] = summary["rating_min"]
            rec[f"{cat}_rating_max"] = summary["rating_max"]

        results.append(rec)
        # guardar progreso parcial
        pd.DataFrame(results).to_csv(OUTPUT_CSV, index=False)

    print("Datos guardados en:", OUTPUT_CSV)

if __name__ == "__main__":
    main()

Procesando 155 municipios (modo gratuito, 1 página por tipo).
[1/155] Acebeda (La) (14)
[2/155] Ajalvir (29)
[3/155] Alameda del Valle (35)
[4/155] Álamo (El) (40)
[5/155] Aldea del Fresno (88)
[6/155] Algete (91)
[7/155] Alpedrete (105)
[8/155] Ambite (112)
[9/155] Anchuelo (127)
[10/155] Arroyomolinos (151)
[11/155] Atazar (El) (164)
[12/155] Batres (170)
[13/155] Becerril de la Sierra (186)
[14/155] Belmonte de Tajo (199)
[15/155] Berrueco (El) (210)
[16/155] Berzosa del Lozoya (203)
[17/155] Boalo (El) (231)
[18/155] Braojos (246)
[19/155] Brea de Tajo (259)
[20/155] Brunete (262)
[21/155] Buitrago del Lozoya (278)
[22/155] Bustarviejo (284)
[23/155] Cabanillas de la Sierra (297)
[24/155] Cabrera (La) (301)
[25/155] Cadalso de los Vidrios (318)
[26/155] Camarma de Esteruelas (323)
[27/155] Campo Real (339)
[28/155] Canencia (344)
[29/155] Carabaña (357)
[30/155] Casarrubuelos (360)
[31/155] Cenicientos (376)
[32/155] Cercedilla (382)
[33/155] Cervera de Buitrago (395)
[34/155] Chap