In [None]:
from __future__ import annotations
import os
import math
from datetime import date, timedelta
from typing import List, Dict, Any
import httpx
import requests
import pandas as pd
import json
import unicodedata

from dotenv import load_dotenv

load_dotenv()

True

In [None]:
# ==== CONFIG ====
ARRIVAL_DATE = "2025-10-15"
DEPARTURE_DATE = "2025-10-18"
RADIUS_KM    = 10
CURRENCY     = "EUR"
LANGUAGE     = "fr-fr"
MAX_PAGES    = 3
ROOM_QTY     = 1
PRICE_ROUND_DECIMALS = 0   # 0 = à l'euro près ; 2 = centimes

# Occupants (CONFIG demandée)
ADULTS        = 2
CHILDREN_QTY  = 0           # si 0 -> aucun param enfants envoyé
CHILDREN_AGES = []          # ex: [11, 5] ; longueur doit = CHILDREN_QTY
# ===============

In [None]:

RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY")
if not RAPIDAPI_KEY:
    raise RuntimeError("RAPIDAPI_KEY manquante dans l'environnement.")

BOOKING_HOST = "apidojo-booking-v1.p.rapidapi.com"
BOOKING_BASE = f"https://{BOOKING_HOST}"
BOOKING_HEADERS = {"X-RapidAPI-Key": RAPIDAPI_KEY, "X-RapidAPI-Host": BOOKING_HOST}

In [None]:
def geocode_city(city: str, country: str | None = None):
    """Géocode via Nominatim (OpenStreetMap)."""
    url = "https://nominatim.openstreetmap.org/search"
    params = {"q": f"{city}, {country}" if country else city, "format": "json", "limit": 1}
    headers = {"User-Agent": "city-geocoder/1.0 (contact: you@example.com)"}
    r = requests.get(url, params=params, headers=headers, timeout=30)
    r.raise_for_status()
    hits = r.json()
    if not hits:
        raise ValueError(f"Ville introuvable: {city!r}")
    return float(hits[0]["lat"]), float(hits[0]["lon"])

In [None]:
def bbox_from_center(lat: float, lon: float, km: float):
    dlat = km / 110.574
    dlon = km / (111.320 * math.cos(math.radians(lat)))
    return f"{lat-dlat:.6f},{lat+dlat:.6f},{lon-dlon:.6f},{lon+dlon:.6f}"  

In [None]:
def haversine_km(lat1, lon1, lat2, lon2):
    R = 6371.0
    p1, p2 = math.radians(lat1), math.radians(lat2)
    dp = math.radians(lat2 - lat1)
    dl = math.radians(lon2 - lon1)
    a = math.sin(dp/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dl/2)**2
    return 2*R*math.asin(math.sqrt(a))

In [None]:
def _coerce_price(x):
    if x is None:
        return None
    try:
        return float(x)
    except Exception:
        try:
            return float(str(x).replace(",", "."))  # au cas où
        except Exception:
            return None

In [None]:
def fetch_hotels_by_city(city: str, country: str | None = None) -> pd.DataFrame:
    # -- Prépare occupants (sans casser le comportement par défaut) --
    children_qty = max(0, int(CHILDREN_QTY))
    if children_qty == 0:
        ages = []
    else:
        # clamp 0..17 + ajuste qty si besoin pour éviter les erreurs de validation
        ages = [max(0, min(int(a), 17)) for a in (CHILDREN_AGES or [])]
        if len(ages) != children_qty:
            children_qty = len(ages)  # on aligne sur la réalité fournie
    guest_qty = int(ADULTS) + children_qty

    lat_c, lon_c = geocode_city(city, country)
    bbox = bbox_from_center(lat_c, lon_c, RADIUS_KM)

    url = f"{BOOKING_BASE}/properties/v2/list-by-map"
    rows = []

    for page in range(MAX_PAGES):
        offset = page * 25
        params = {
            "bbox": bbox,
            "room_qty": str(ROOM_QTY),
            "guest_qty": str(guest_qty),       # réplique ton ancien GUEST_QTY (2 par défaut)
            "arrival_date": ARRIVAL_DATE,
            "departure_date": DEPARTURE_DATE,
            "order_by": "popularity",
            "languagecode": LANGUAGE,
            "price_filter_currencycode": CURRENCY,
            "offset": str(offset),
        }
        # Ajoute les paramètres enfants uniquement si nécessaire
        if children_qty > 0:
            params["children_qty"] = str(children_qty)
            params["children_age"] = ",".join(str(a) for a in ages)

        r = requests.get(url, headers=BOOKING_HEADERS, params=params, timeout=60)
        r.raise_for_status()
        results = (r.json() or {}).get("result") or []
        if not results:
            break

        for h in results:
            # ignorer les placeholders vides
            if not h.get("hotel_id") and not h.get("hotel_name"):
                continue

            # distance centre (optionnelle)
            lat = h.get("latitude") or h.get("hotel_latitude")
            lon = h.get("longitude") or h.get("hotel_longitude")
            dist = None
            if lat is not None and lon is not None:
                try:
                    dist = round(haversine_km(lat_c, lon_c, float(lat), float(lon)), 2)
                except Exception:
                    dist = None

            # prix -> float -> arrondi
            price_raw = h.get("min_total_price") or (h.get("price_breakdown") or {}).get("all_inclusive_price")
            price_num = _coerce_price(price_raw)
            price_rounded = round(price_num, PRICE_ROUND_DECIMALS) if price_num is not None else None

            rows.append({
                "hotel_id": h.get("hotel_id"),
                "name": h.get("hotel_name") or h.get("name"),
                "stars": h.get("class") or h.get("hotel_class"),
                "accommodation_type": h.get("accommodation_type_name") or h.get("accommodation_type"),
                "review_score": h.get("review_score"),
                "review_score_word": h.get("review_score_word"),
                "review_count": h.get("review_nr"),
                "min_total_price": price_rounded,
                "currency": CURRENCY,
                "is_mobile_deal": h.get("is_mobile_deal"),
                "distance_km_from_center": dist,
                "city": city,
                "site": "booking.com",
            })

    df = pd.DataFrame(rows)
    if df.empty:
        return df

    # dédup + tri
    if "hotel_id" in df.columns:
        df = df.drop_duplicates(subset=["hotel_id"], keep="first")
    df["min_total_price"] = pd.to_numeric(df["min_total_price"], errors="coerce")
    df = df.sort_values(
        by=["min_total_price", "distance_km_from_center", "review_score"],
        ascending=[True, True, False],
        na_position="last"
    ).reset_index(drop=True)
    return df


In [None]:
df_paris = fetch_hotels_by_city("Paris", country="France")
df_paris

In [123]:
df_paris["min_total_price"] = pd.to_numeric(df_paris["min_total_price"], errors="coerce")
df_paris_sorted = df_paris.sort_values(
    by=["min_total_price", "distance_km_from_center", "review_score"],
    ascending=[True, True, False],
    na_position="last"
)
df_paris_sorted

Unnamed: 0,hotel_id,name,stars,accommodation_type,review_score,review_score_word,review_count,min_total_price,currency,is_mobile_deal,distance_km_from_center,city,site
0,10263957,Studio meublé vue tour Eiffel et tout Paris,,Appartement,8.0,Très bien,27.0,112.0,EUR,0,9.23,Paris,booking.com
1,14901311,Studio moderne proche de Paris,,Appartement,1.0,Mauvais,1.0,118.0,EUR,1,6.25,Paris,booking.com
2,882074,Hôtel Moderne,,Hôtel,7.6,Bien,911.0,136.0,EUR,0,8.39,Paris,booking.com
3,12696292,1 Next To Paris,,Appartement,9.0,Fabuleux,32.0,152.0,EUR,0,8.80,Paris,booking.com
4,14919307,15 mins from Paris,,Appartement,,,,158.0,EUR,0,9.67,Paris,booking.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,8204108,Croissants et Confitures,,Appartement,8.4,Très bien,18.0,2008.0,EUR,0,2.77,Paris,booking.com
752,51900,Hotel Westminster,5.0,Hôtel,8.6,Superbe,572.0,2020.0,EUR,0,2.17,Paris,booking.com
753,9856181,Villa-des-Prés,5.0,Hôtel,9.6,Exceptionnel,440.0,2046.0,EUR,0,0.88,Paris,booking.com
754,59912,Paris Marriott Champs Elysees Hotel,5.0,Hôtel,7.8,Bien,450.0,2319.0,EUR,0,3.73,Paris,booking.com
