In [12]:
from __future__ import annotations
import os
from datetime import date, timedelta
from typing import List, Dict, Any
import httpx
import requests
import pandas as pd
import json
import unicodedata

from dotenv import load_dotenv

load_dotenv()

True

In [13]:
# country_labels.py
COUNTRY_LABELS = {
    "AE": {"en": "United Arab Emirates", "fr": "Émirats arabes unis"},
    "AR": {"en": "Argentina",            "fr": "Argentine"},
    "AS": {"en": "Asia (regional)",      "fr": "Asie (régional)"},
    "AT": {"en": "Austria",              "fr": "Autriche"},
    "AU": {"en": "Australia",            "fr": "Australie"},
    "BE": {"en": "Belgium",              "fr": "Belgique"},
    "BR": {"en": "Brazil",               "fr": "Brésil"},
    "CA": {"en": "Canada",               "fr": "Canada"},
    "CH": {"en": "Switzerland",          "fr": "Suisse"},
    "CL": {"en": "Chile",                "fr": "Chili"},
    "CN": {"en": "China",                "fr": "Chine"},
    "CO": {"en": "Colombia",             "fr": "Colombie"},
    "DE": {"en": "Germany",              "fr": "Allemagne"},
    "DK": {"en": "Denmark",              "fr": "Danemark"},
    "ES": {"en": "Spain",                "fr": "Espagne"},
    "FI": {"en": "Finland",              "fr": "Finlande"},
    "FR": {"en": "France",               "fr": "France"},
    "GB": {"en": "United Kingdom",       "fr": "Royaume-Uni"},
    "GR": {"en": "Greece",               "fr": "Grèce"},
    "HK": {"en": "Hong Kong",            "fr": "Hong Kong"},
    "HU": {"en": "Hungary",              "fr": "Hongrie"},
    "ID": {"en": "Indonesia",            "fr": "Indonésie"},
    "IE": {"en": "Ireland",              "fr": "Irlande"},
    "IN": {"en": "India",                "fr": "Inde"},
    "IS": {"en": "Iceland",              "fr": "Islande"},
    "IT": {"en": "Italy",                "fr": "Italie"},
    "JP": {"en": "Japan",                "fr": "Japon"},
    "KR": {"en": "South Korea",          "fr": "Corée du Sud"},
    "LATAM": {"en": "Latin America (regional)", "fr": "Amérique latine (régional)"},
    "MX": {"en": "Mexico",               "fr": "Mexique"},
    "MY": {"en": "Malaysia",             "fr": "Malaisie"},
    "NL": {"en": "Netherlands",          "fr": "Pays-Bas"},
    "NO": {"en": "Norway",               "fr": "Norvège"},
    "NZ": {"en": "New Zealand",          "fr": "Nouvelle-Zélande"},
    "PE": {"en": "Peru",                 "fr": "Pérou"},
    "PH": {"en": "Philippines",          "fr": "Philippines"},
    "PT": {"en": "Portugal",             "fr": "Portugal"},
    "SE": {"en": "Sweden",               "fr": "Suède"},
    "SG": {"en": "Singapore",            "fr": "Singapour"},
    "TH": {"en": "Thailand",             "fr": "Thaïlande"},
    "TR": {"en": "Türkiye",              "fr": "Turquie"},
    "TW": {"en": "Taiwan",               "fr": "Taïwan"},
    "US": {"en": "United States",        "fr": "États-Unis"},
    "VN": {"en": "Vietnam",              "fr": "Viêt Nam"},
    "XE": {"en": "Global / Other",       "fr": "Global / Autres"},
    "ZA": {"en": "South Africa",         "fr": "Afrique du Sud"},
}

# Alias utiles
ALIASES = {
    "usa": "US",
    "etats unis": "US",
    "royaume uni": "GB",
    "uae": "AE",
    "emirats": "AE",
}

In [14]:
def _normalize(country_or_name: str) -> str:
    """
    Accepte code, nom FR ou nom EN. Retourne toujours un code pays ISO (FR, US, etc.).
    Si le pays n'est pas reconnu, lève ValueError avec un message explicite.
    """
    if not country_or_name:
        raise ValueError("Le paramètre est vide : impossible de résoudre le pays.")

    s = country_or_name.strip().upper()
    if s in COUNTRY_LABELS:  # cas simple: déjà un code
        return s

    # Normaliser accents/majuscules
    s_norm = unicodedata.normalize("NFKD", country_or_name.strip().lower())
    s_norm = "".join(ch for ch in s_norm if not unicodedata.combining(ch))

    # Alias connus
    if s_norm in ALIASES:
        return ALIASES[s_norm]

    # Noms FR/EN
    for code, labels in COUNTRY_LABELS.items():
        for name in labels.values():
            name_norm = "".join(
                ch for ch in unicodedata.normalize("NFKD", name.lower())
                if not unicodedata.combining(ch)
            )
            if s_norm == name_norm:
                return code

    # Si rien trouvé
    raise ValueError(
        f"Impossible de résoudre '{country_or_name}' en code pays. "
        f"Essayez un code ISO valide (ex: 'FR', 'US') ou un nom reconnu."
    )


In [15]:
def normalize_locales(locales: list[str], country_code: str) -> list[str]:
    # mapping minimal pour les locales “nues”
    fallback = {
        "US": {"en": "en_US", "es": "es_US"},
        "FR": {"fr": "fr_FR"},
        "GB": {"en": "en_GB"},
    }
    out = []
    for loc in locales:
        if "_" in loc:
            out.append(loc)
        else:
            out.append(fallback.get(country_code, {}).get(loc, loc))
    # dédoublonner proprement
    return sorted({l for l in out if l})


In [16]:
RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY")

HEADERS = {
    "X-RapidAPI-Key": RAPIDAPI_KEY,
    "X-RapidAPI-Host": "hotels-com-provider.p.rapidapi.com"
}

def get_locales(country: str) -> list[str]:
    """
    Résout 'country' (nom ou code) en code ISO via _normalize,
    appelle l'API /v2/domains, et retourne la liste des locales supportées.
    """
    code = _normalize(country)  # garantit un code ISO
    
    # --- appel API ---
    url = "https://hotels-com-provider.p.rapidapi.com/v2/domains"
    r = requests.get(url, headers=HEADERS, timeout=20)
    r.raise_for_status()
    data = r.json()
    
    # transformer en DataFrame (1 ligne, colonnes multiples)
    df_domains = pd.json_normalize(data)
    
    # --- extraire les locales ---
    prefix = f"{code}.supported_locales."
    locales = []
    for col in df_domains.columns:
        if col.startswith(prefix):
            if col.endswith(".key"):
                locales.append(df_domains.at[0, col])
            elif col.endswith(".language_code"):
                val = df_domains.at[0, col]
                if val and isinstance(val, str):
                    locales.append(val.replace("-", "_"))
    return sorted(set(locales))


In [17]:
def get_first_locale(country: str) -> str | None:
    """
    Récupère la première locale dispo pour un pays donné.
    - 'country' peut être code (FR, US...) ou nom (France, United States...).
    Retourne une locale (ex: 'fr_FR') ou None si rien trouvé.
    """
    locales = get_locales(country)  # ta fonction existante qui va taper /v2/domains
    return locales[0] if locales else None


In [18]:
KEY = os.getenv("RAPIDAPI_KEY")
H = {"X-RapidAPI-Key": KEY, "X-RapidAPI-Host": "hotels-com-provider.p.rapidapi.com"}

def get_city_gaia_id(city: str, country: str = "France", locale: str = "fr_FR", domain: str = "FR") -> str:
    """
    Récupère le gaiaId d'une ville donnée via /v2/regions.
    - city: nom de la ville (ex: "Nice", "La Mecque")
    - country: pays (ex: "France", "Arabie Saoudite")
    - locale: code de langue (ex: "fr_FR", "ar_SA")
    - domain: code pays du site Hotels.com (liste fournie par /v2/domains, ex: "FR", "SA", "US")
    """
    r = requests.get(
        "https://hotels-com-provider.p.rapidapi.com/v2/regions",
        headers=H,
        params={"query": city, "domain": domain, "locale": locale},
        timeout=20
    )
    r.raise_for_status()
    payload = r.json()
    items = payload["data"] if isinstance(payload, dict) and "data" in payload else payload
    df = pd.json_normalize(items)

    # Filtre sur type CITY et pays demandé
    cities = df[df["type"].eq("CITY")]
    sel = cities[cities["regionNames.fullName"].str.contains(country, case=False, na=False)]

    # Prioriser la ville exacte (nom commence par city)
    sel = sel[sel["regionNames.fullName"].str.startswith(city, na=False)]

    if sel.empty:
        raise ValueError(f"Aucun gaiaId trouvé pour {city}, {country}")

    gaia_id = str(sel.iloc[0]["gaiaId"])
    return gaia_id

# Exemple d'utilisation
if __name__ == "__main__":
    print("Nice:", get_city_gaia_id("Nice", "France", "fr_FR", "FR"))


Nice: 2526


In [19]:
import requests, pandas as pd, unicodedata

def _norm_txt(s: str) -> str:
    s = unicodedata.normalize("NFKD", str(s).strip().lower())
    return "".join(ch for ch in s if not unicodedata.combining(ch))

def get_city_gaia_id(
    city: str,
    country: str = "France",
    locale: str | None = None,      # si None -> auto via get_locales(country)
    domain: str | None = None       # si None -> code pays (résolu par _normalize)
) -> tuple[str, str, str]:
    """
    Récupère le gaiaId d'une ville via /v2/regions en s'appuyant sur:
      - _normalize(country) -> code ISO (FR, US, ...)
      - get_locales(country) -> locales disponibles (via /v2/domains)
    Retourne (gaiaId, fullName, locale_effective).
    """
    code = _normalize(country)  # ex: "FR"
    # locale auto si non fournie
    if not locale:
        locs = get_locales(code)
        locale = locs[0] if locs else "en_US"
    # domain auto si non fourni (l’API attend le code pays ici)
    if not domain:
        domain = code

    # --- appel /v2/regions ---
    r = requests.get(
        "https://hotels-com-provider.p.rapidapi.com/v2/regions",
        headers=H,
        params={"query": city, "domain": domain, "locale": locale},
        timeout=20
    )
    if r.status_code >= 400:
        raise requests.HTTPError(
            f"/v2/regions HTTP {r.status_code} — params(city={city}, country={country}, code={code}, "
            f"domain={domain}, locale={locale}) — body: {r.text[:300]}"
        )
    payload = r.json()
    items = payload["data"] if isinstance(payload, dict) and "data" in payload else payload
    if not isinstance(items, list) or not items:
        raise ValueError(f"Aucune donnée renvoyée pour '{city}' ({country}).")

    df = pd.json_normalize(items)
    if "type" not in df.columns:
        raise ValueError("Réponse inattendue: champ 'type' manquant.")
    df = df[df["type"].eq("CITY")].copy()
    if df.empty:
        raise ValueError(f"Aucune ville trouvée pour '{city}' ({country}).")

    if "regionNames.fullName" not in df.columns:
        df["regionNames.fullName"] = ""
    if "gaiaId" not in df.columns:
        raise ValueError("Réponse inattendue: champ 'gaiaId' manquant.")

    # --- scoring robuste (ville + pays normalisés) ---
    city_n = _norm_txt(city)
    country_n = _norm_txt(country)

    def score(fullname: str) -> int:
        fn = _norm_txt(fullname)
        if not fn: return 0
        exact_city = int(fn.startswith(city_n + ",") or fn.split(",")[0] == city_n)
        starts_city = int(fn.startswith(city_n))
        contains_city = int(city_n in fn)
        contains_country = int(country_n in fn)
        return (3*exact_city) + (2*starts_city) + contains_city + (2*contains_country)

    df["__score"] = df["regionNames.fullName"].fillna("").apply(score)
    if "index" in df.columns:
        # ordre de pertinence renvoyé par l’API, si dispo
        def _safe_int(x):
            try: return int(x)
            except: return 10**9
        df["__idx"] = df["index"].apply(_safe_int)
    else:
        df["__idx"] = range(len(df))

    df = df.sort_values(by=["__score", "__idx"], ascending=[False, True])
    if df.iloc[0]["__score"] == 0:
        raise ValueError(
            f"Aucun match convaincant pour ville='{city}' pays='{country}' "
            f"(code={code}, domain={domain}, locale={locale}). "
            f"Candidats: {df['regionNames.fullName'].head(5).to_list()}"
        )

    gaia_id = str(df.iloc[0]["gaiaId"])
    full_name = str(df.iloc[0]["regionNames.fullName"])
    return gaia_id, full_name, locale


# Récupère tous les hotels

In [None]:
def hotels_in_city_df(
    gaia_id: str,
    *,
    domain: str,
    locale: str,
    checkin: str,
    checkout: str,
    adults: int = 2,
    sort_order: str = "PRICE_LOW_TO_HIGH",   # REQUIRED par l'API
    max_pages: int = 20,                     # sécurité
    timeout: int = 20,
    debug: bool = False,
    return_props: bool = False,              # si True -> (df, props)
) -> pd.DataFrame | tuple[pd.DataFrame, list[dict]]:
    """
    Récupère TOUTES les pages d'hôtels via /v2/hotels/search pour un gaia_id donné
    et renvoie un DataFrame (id, name, rating, prix, coords, distance).

    Params clés API (v2) utilisés :
      - region_id, domain, locale, checkin_date, checkout_date,
        adults_number, page_number, currency, sort_order

    sort_order accepté : 'REVIEW', 'RECOMMENDED', 'DISTANCE',
                         'PRICE_LOW_TO_HIGH', 'PROPERTY_CLASS', 'PRICE_RELEVANT'
    """
    url = "https://hotels-com-provider.p.rapidapi.com/v2/hotels/search"
    headers = {
        "X-RapidAPI-Key": KEY,   # assure-toi que KEY est définie dans ton scope
        "X-RapidAPI-Host": "hotels-com-provider.p.rapidapi.com",
    }

    all_props: list[dict] = []
    for page in range(1, max_pages + 1):
        params = {
            "region_id": gaia_id,
            "domain": domain,
            "locale": locale,
            "checkin_date": checkin,
            "checkout_date": checkout,
            "adults_number": str(adults),
            "page_number": str(page),
            "currency": "EUR",
            "sort_order": sort_order,
        }
        r = requests.get(url, headers=headers, params=params, timeout=timeout)
        if debug:
            print(f"[{r.status_code}] page={page} {r.url}")
            if r.status_code >= 400:
                print("BODY:", r.text[:600])
        r.raise_for_status()

        data = r.json()
        items = data.get("properties") or data.get("results") or data.get("data") or data
        if not isinstance(items, list) or not items:
            # plus rien, on arrête
            break

        all_props.extend(items)

        # heuristique: si la page renvoie <200 éléments, souvent fin des résultats
        if len(items) < 200:
            break

    # ---- transformation en DataFrame ----
    rows = []
    for h in all_props:
        price_lead = (h.get("price") or {}).get("lead") or {}
        coord = (h.get("mapMarker") or {}).get("latLong") or {}
        reviews = h.get("reviews") or {}
        dest = (h.get("destinationInfo") or {}).get("distanceFromDestination") or {}

        rows.append({
            "id": h.get("id"),
            "name": h.get("name"),
            "star": h.get("star") or h.get("starRating") or h.get("rating"),
            "review_score": reviews.get("score"),
            "review_total": reviews.get("total"),
            "price_amount": price_lead.get("amount"),
            "price_currency": (price_lead.get("currencyInfo") or {}).get("code"),
            "price_formatted": price_lead.get("formatted"),
            "latitude": coord.get("latitude"),
            "longitude": coord.get("longitude"),
            "distance_value": dest.get("value"),
            "distance_unit": dest.get("unit"),
        })
    df = pd.DataFrame(rows)

    if return_props:
        return df, all_props
    return df


In [31]:
gaia_id, full_name, loc = get_city_gaia_id("Nice", "France")   # ta fonction existante
df_hotels = hotels_in_city_df(
    gaia_id,
    domain="FR",
    locale=loc,                 # ex: "fr_FR"
    checkin="2025-10-04",
    checkout="2025-10-06",
    adults=2,
    sort_order="PRICE_LOW_TO_HIGH",
    debug=True
)

df_hotels.head(10)


[200] page=1 https://hotels-com-provider.p.rapidapi.com/v2/hotels/search?region_id=2526&domain=FR&locale=fr_FR&checkin_date=2025-10-04&checkout_date=2025-10-06&adults_number=2&page_number=1&currency=EUR&sort_order=PRICE_LOW_TO_HIGH
[200] page=2 https://hotels-com-provider.p.rapidapi.com/v2/hotels/search?region_id=2526&domain=FR&locale=fr_FR&checkin_date=2025-10-04&checkout_date=2025-10-06&adults_number=2&page_number=2&currency=EUR&sort_order=PRICE_LOW_TO_HIGH
[200] page=3 https://hotels-com-provider.p.rapidapi.com/v2/hotels/search?region_id=2526&domain=FR&locale=fr_FR&checkin_date=2025-10-04&checkout_date=2025-10-06&adults_number=2&page_number=3&currency=EUR&sort_order=PRICE_LOW_TO_HIGH


Unnamed: 0,id,name,star,review_score,review_total,price_amount,price_currency,price_formatted,latitude,longitude,distance_value,distance_unit
0,14766071,hotelF1 Antibes Sophia Antipolis,1.0,6.8,256,70.32,EUR,70 €,43.602872,7.063569,19.93,KILOMETER
1,14771124,hotelF1 Nice Villeneuve-Loubet,1.0,6.4,341,94.28,EUR,94 €,43.627973,7.129794,14.02,KILOMETER
2,15021621,Belambra Clubs La Colle-sur-Loup - Les Terrass...,3.0,8.4,68,98.4,EUR,98 €,43.680803,7.099714,13.45,KILOMETER
3,1063555,L'Esterel,2.0,8.8,604,109.48,EUR,109 €,43.553392,7.019254,26.18,KILOMETER
4,2686249,Azurene Royal Hotel,2.0,7.4,188,112.86,EUR,113 €,43.551022,7.022977,26.14,KILOMETER
5,1103037,Zenitude Hôtel-Résidences Le Cannet,4.0,8.0,632,121.46,EUR,121 €,43.574577,7.021833,24.51,KILOMETER
6,105096779,HOTEL DU NORD,,7.4,18,123.44,EUR,123 €,43.553558,7.019806,26.13,KILOMETER
7,19328720,City Résidence Sophia,,8.2,223,123.64,EUR,124 €,43.61052,7.05379,20.08,KILOMETER
8,10587334,Chanteclair Hotel,,8.2,276,124.78,EUR,125 €,43.551741,7.010556,26.83,KILOMETER
9,2202354,Residhotel Les Coralynes,3.0,7.8,448,125.43,EUR,125 €,43.55467,7.01443,26.38,KILOMETER


In [32]:
df_hotels[df_hotels["name"].str.contains("Bristol", case=False, na=False)]

Unnamed: 0,id,name,star,review_score,review_total,price_amount,price_currency,price_formatted,latitude,longitude,distance_value,distance_unit
159,1605913,Hôtel Bristol,3.0,7.6,257,251.62,EUR,252 €,43.70426,7.26381,0.67,KILOMETER
