In [2]:
# --- Imports ---------------------------------------------------------------
from datetime import date
import pandas as pd

from expedia.client import ExpediaClient
from expedia.helpers.rates_report import (
    DEFAULT_OCCUPANCY,
    DEFAULT_RATE_TYPES,
    RATE_TYPE_LABELS,
    SEARCH_POINT_DENSITY,
    generate_rates_dataframe,
    export_rates_to_excel,
)

client = ExpediaClient.from_env()

# --- Inputs ----------------------------------------------------------------
LATITUDE = 52.370216      # float
LONGITUDE = 4.895168      # float
RADIUS_KM = 3             # float/int
CHECKIN = date(2025, 10, 20)   # date or "YYYY-MM-DD"
CHECKOUT = date(2025, 10, 22)  # date or "YYYY-MM-DD"

# Optional overrides
OCCUPANCY = DEFAULT_OCCUPANCY
RATE_TYPES = DEFAULT_RATE_TYPES       # e.g. ("mkt_prepay", "priv_pkg")
RATE_LABELS = RATE_TYPE_LABELS        # customise if desired
POINT_DENSITY = SEARCH_POINT_DENSITY  # 96 matches the app

# --- Fetch rates -----------------------------------------------------------
rates_df, meta = generate_rates_dataframe(
    client,
    latitude=LATITUDE,
    longitude=LONGITUDE,
    radius_km=RADIUS_KM,
    checkin=CHECKIN,
    checkout=CHECKOUT,
    occupancy=OCCUPANCY,
    rate_types=RATE_TYPES,
    n_points=POINT_DENSITY,
    rate_type_labels=RATE_LABELS,
)


In [21]:
# Notebook cell 1 – imports and helpers
import pandas as pd
from pathlib import Path

from expedia.client import ExpediaClient
from expedia.event_rates_cache import update_event_rates_cache
from events.event_helpers import load_events_catalog

CACHE_PATH = Path("reports/events_with_hotels.json")
EVENT_DATA_DIRECTORIES = [Path("data"), Path("events/data")]

events_catalog = load_events_catalog(EVENT_DATA_DIRECTORIES)
print(f"{len(events_catalog)} events loaded")

client = ExpediaClient.from_env()  # assumes ENV vars or .env populated


Loaded event sources:
  - events/data/2025_events.json
  - events/data/2026_events.json
  - events/data/2027_events.json
  - events/data/2028_events.json
2179 events loaded


In [5]:
from pathlib import Path
import json
import pandas as pd
import builtins

def list_cached_event_names(cache_path: Path = Path("reports/events_with_hotels.json")) -> list[str]:
    """Load the events cache and return a list of event titles."""
    if not cache_path.exists():
        return []

    try:
        data = json.loads(cache_path.read_text(encoding="utf-8"))
    except (OSError, json.JSONDecodeError):
        return []

    if isinstance(data, dict):
        data = [data]
    elif not isinstance(data, list):
        return []

    names: list[str] = []
    for entry in data:
        if isinstance(entry, dict):
            title = entry.get("title")
            if isinstance(title, builtins.str) and title.strip():
                names.append(title.strip())

    return names



def drop_events_by_name(df: pd.DataFrame, banned_names: list[str]) -> pd.DataFrame:
    """
    Return a copy of `df` with rows removed when `name` matches one of `banned_names`.
    Non-string values in the column are ignored.
    """
    if "title" not in df.columns or not banned_names:
        return df.copy()

    mask = df["title"].astype(str).isin({name for name in banned_names if isinstance(name, str)})
    return df.loc[~mask].reset_index(drop=True)


In [23]:
from expedia.geo_helpers import circle_polygon_geojson, to_geojson_string, haversine_distance

# we only want events in this circle - so we filter the dataframe for USA events
ref_lat, ref_lon = 40.28361357876028, -100.2159469728348
geojson = circle_polygon_geojson(ref_lat, ref_lon, 3000000, 128)

events_catalog["USA_distance_m"] = events_catalog.apply(
    lambda row: haversine_distance(ref_lat, ref_lon, row["latitude"], row["longitude"]),
    axis=1,
)

# already_cached = list_cached_event_names()
# events_catalog = drop_events_by_name(events_catalog, already_cached)

events_catalog = (
    events_catalog.loc[events_catalog["USA_distance_m"] <= 3000000]
    .reset_index(drop=True)
)


events_catalog = events_catalog[pd.to_datetime(events_catalog["date_start"]) >= pd.Timestamp.today().normalize()]

events_catalog = events_catalog[pd.to_datetime(events_catalog["date_end"]) <= pd.Timestamp.today().normalize() + pd.Timedelta(days=500)]


usa_events = (
    events_catalog.loc[events_catalog["date_start"] != events_catalog["date_end"].reset_index(drop=True)]
)   

In [24]:
usa_events.to_excel("events/data/USA_events.xlsx")
len(usa_events)


363

In [25]:
chunk = 10
tot = len(usa_events)
begin = 0
for i in range(begin, tot, chunk):
    start = i
    end = min(i + chunk - 1, tot - 1)
    
    print(start, end)

    selected_events = usa_events.iloc[start:end]
    # Notebook cell 3 – cache the selected event search
    limit = 100
    records = update_event_rates_cache(
        selected_events,
        client,
        output_path=CACHE_PATH,
        limit=limit,
        show_progress=True,          # optional: prints progress per event
        default_radius_km=6.0,       # override if your catalog omits radius_km
        retry_kwargs={
            "max_attempts": 5,
            "initial_backoff_seconds": 8.0,
            "backoff_multiplier": 1.5,
            "post_success_sleep_seconds": 2.0,
        },
    )

    print(f"{len(records)} total events now cached in {CACHE_PATH}")


0 9


KeyboardInterrupt: 

In [87]:


selected_events = usa_events.iloc[365:370]
# Notebook cell 3 – cache the selected event search
limit = 100
records = update_event_rates_cache(
    selected_events,
    client,
    output_path=CACHE_PATH,
    limit=limit,
    show_progress=True,          # optional: prints progress per event
    default_radius_km=6.0,       # override if your catalog omits radius_km
    retry_kwargs={
        "max_attempts": 5,
        "initial_backoff_seconds": 8.0,
        "backoff_multiplier": 1.5,
        "post_success_sleep_seconds": 2.0,
    },
)

print(f"{len(records)} total events now cached in {CACHE_PATH}")


[1] Cached rates for 2nd Innovation in Obesity Therapeutics Summit (radius 6.0 km)
[2] Cached rates for Advancing Data Center Construction 2025 (radius 6.0 km)
[3] Cached rates for Advancing AEC Knowledge Management 2025 (radius 6.0 km)
[4] Cached rates for 4th ESG in Life Sciences Summit East 2025 (radius 6.0 km)
[5] Cached rates for Advancing Data Center Construction 2025 (radius 6.0 km)
363 total events now cached in reports/events_with_hotels.json


In [None]:
usa_events