This code scrapes the yearly ride wait information, specifically:
- average wait time per ride
- maximum wait time per ride
- uptime per ride

we use the same code for 3 years:
PARK_ID = 160
START, END = "2025-01-01", "2025-11-11"   # pas aan
OUT = "efteling_rides2025-11-11.csv"

PARK_ID = 160
START, END = "2024-01-01", "2024-12-31"   # pas aan
OUT = "efteling_rides2024.csv"

PARK_ID = 160
START, END = "2023-01-01", "2023-12-31"   # pas aan
OUT = "efteling_rides2023.csv"

In [13]:
# tiny_queue_times.py
import requests, pandas as pd, re
from io import StringIO

PARK_ID = 160
START, END = "2025-01-01", "2025-11-11"   # pas aan
OUT = "efteling_rides2025-11-11.csv"

num = lambda s: (s.astype(str).str.replace(",", ".", regex=False)
                   .str.extract(r"([\d.]+)")[0].astype(float))

rows = []
for d in pd.date_range(START, END):
    url = f"https://queue-times.com/parks/{PARK_ID}/calendar/{d:%Y/%m/%d}"
    html = requests.get(url, headers={"User-Agent":"Mozilla/5.0"}).text
    tables = pd.read_html(StringIO(html))

    avg = mx = up = None
    for t in tables:
        cols = [str(c).casefold() for c in t.columns]
        if not cols or "ride" not in cols[0]: 
            continue
        if len(cols) > 1 and "queue" in cols[1]:
            if "average" in cols[1]:
                avg = t.rename(columns={t.columns[0]:"ride", t.columns[1]:"avg_queue_min"})[["ride","avg_queue_min"]]
                avg["avg_queue_min"] = num(avg["avg_queue_min"])
            elif "maximum" in cols[1] or "max" in cols[1]:
                mx  = t.rename(columns={t.columns[0]:"ride", t.columns[1]:"max_queue_min"})[["ride","max_queue_min"]]
                mx["max_queue_min"] = num(mx["max_queue_min"])
        if len(cols) > 1 and "uptime" in cols[1]:
            up  = t.rename(columns={t.columns[0]:"ride", t.columns[1]:"uptime_pct"})[["ride","uptime_pct"]]
            up["uptime_pct"] = num(up["uptime_pct"])

    # merge per datum
    df = None
    for piece in (avg, mx, up):
        if piece is not None:
            df = piece if df is None else df.merge(piece, on="ride", how="outer")
    if df is not None and not df.empty:
        df.insert(0, "date", d.date().isoformat())
        df.insert(1, "park_id", PARK_ID)
        rows.append(df)

# alles wegschrijven
pd.concat(rows, ignore_index=True).to_csv(OUT, index=False)
print(f"âœ… Klaar: {OUT}")


âœ… Klaar: efteling_rides2025-11-11.csv


Merge files

In [18]:
import pandas as pd

# lijst van jouw bestanden
files = [
    "efteling_rides2023.csv",
    "efteling_rides2024.csv",
    "efteling_rides2025-11-11.csv"
]

# inlezen en samenvoegen
dataframes = [pd.read_csv(f) for f in files]
all_data = pd.concat(dataframes, ignore_index=True)

# sorteren op datum (optioneel)
all_data = all_data.sort_values("date")

# opslaan als Ã©Ã©n bestand
all_data.to_csv("efteling_rides_all.csv", index=False)

print("âœ… File merged and named: efteling_rides_all.csv")


âœ… File merged and named: efteling_rides_all.csv


we use the same code for 3 years:
PARK_ID = 160
START, END = "2025-01-01", "2025-11-11"  
OUT = "efteling_meta_2025-11-11.csv"

PARK_ID = 160
START, END = "2024-01-01", "2024-12-31"   
OUT = "efteling_meta_2024.csv"

PARK_ID = 160
START, END = "2023-01-01", "2023-12-31"
OUT = "efteling_meta_2023.csv"

In [22]:


import requests
from bs4 import BeautifulSoup
import re
import csv
from datetime import date, timedelta

PARK_ID = 160
START = "2023-01-01"   # change as needed (YYYY-MM-DD)
END   = "2023-12-31"   # inclusive
OUT_CSV = "efteling_meta_2023.csv"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (compatible; EftelingMetaScraper/1.0)"
}

def daterange(start_date, end_date):
    """Yield date objects from start_date to end_date inclusive."""
    d = start_date
    while d <= end_date:
        yield d
        d += timedelta(days=1)

def to_float(text):
    """Extract first number from text and return as float (handles 12,3 or 12.3)."""
    if not text:
        return None
    m = re.search(r"[-+]?\d+(?:[.,]\d+)?", text)
    return float(m.group(0).replace(",", ".")) if m else None

def get_crowd_values(soup):
    """
    Find both 'Crowd level' rows:
      - one with a percentage (e.g. '13%')
      - one with a label (e.g. 'Empty')
    Returns (crowd_percent_str, crowd_label_str)
    """
    values = []
    for block in soup.select("div.panel-block"):
        spans = block.find_all("span")
        if spans and spans[0].get_text(strip=True) == "Crowd level":
            if len(spans) > 1:
                values.append(spans[1].get_text(strip=True))
    crowd_percent = next((v for v in values if "%" in v), None)
    crowd_label = next((v for v in values if "%" not in v), None)
    return crowd_percent, crowd_label

def get_opening_hours(soup):
    """Get the first tab text like 'ðŸŽ„ 10:00-20:00'."""
    link = soup.select_one("#tabs a")
    return link.get_text(strip=True) if link else None

def get_panel_forecast_actual(soup, title_contains):
    """
    In the panel whose <h2> contains title_contains, read:
      'Forecast average' and 'Actual average'
    Returns (forecast_float, actual_float)
    """
    title_contains = title_contains.lower()
    for h2 in soup.select("div.panel .panel-heading h2"):
        title = h2.get_text(strip=True).lower()
        if title_contains in title:
            panel = h2.find_parent("div", class_="panel")
            if not panel:
                break
            forecast = actual = None
            for block in panel.select("div.panel-block"):
                spans = block.find_all("span")
                if len(spans) >= 2:
                    key = spans[0].get_text(strip=True).lower()
                    val = spans[1].get_text(strip=True)
                    if "forecast average" in key:
                        forecast = to_float(val)
                    elif "actual average" in key:
                        actual = to_float(val)
            return forecast, actual
    return None, None

def get_events(soup):
    """Return a semicolon-joined string of event names (if any)."""
    for h2 in soup.select("div.panel .panel-heading h2"):
        if "events" in h2.get_text(strip=True).lower():
            panel = h2.find_parent("div", class_="panel")
            if not panel:
                return None
            names = []
            for block in panel.select("div.panel-block"):
                span = block.find("span")
                if span:
                    txt = span.get_text(strip=True)
                    if txt:
                        names.append(txt)
            return "; ".join(names) if names else None
    return None

def scrape_day(d):
    """Scrape one day and return a dict with all fields."""
    url = f"https://queue-times.com/parks/{PARK_ID}/calendar/{d:%Y/%m/%d}"
    resp = requests.get(url, headers=HEADERS, timeout=30)
    if resp.status_code != 200:
        return None  # page missing or failed

    soup = BeautifulSoup(resp.text, "html.parser")

    # Basic info
    date_title = soup.find("h1", class_="title")
    date_text = date_title.get_text(strip=True) if date_title else d.isoformat()

    # Fields you requested
    crowd_pct, crowd_label = get_crowd_values(soup)
    hours = get_opening_hours(soup)

    temp_fore, temp_actual = get_panel_forecast_actual(soup, "Temperature")
    prec_fore, prec_actual = get_panel_forecast_actual(soup, "Precipitation intensity")
    wind_fore, wind_actual = get_panel_forecast_actual(soup, "Wind speed")

    events = get_events(soup)

    return {
        "date": d.isoformat(),
        "page_date_heading": date_text,
        "park_id": PARK_ID,
        "crowd_percent": crowd_pct,        # string like "13%"
        "crowd_label": crowd_label,        # e.g. "Empty"
        "temperature_forecast_c": temp_fore,
        "temperature_actual_c":   temp_actual,
        "intensity_forecast_mmph": prec_fore,
        "intensity_actual_mmph":   prec_actual,
        "wind_forecast_mps": wind_fore,
        "wind_actual_mps":   wind_actual,
        "opening_hours": hours,            # e.g. "ðŸŽ„ 10:00-20:00"
        "events": events                   # e.g. "ðŸŽ„ Winter Efteling"
    }

def main():
    start = date.fromisoformat(START)
    end   = date.fromisoformat(END)

    fieldnames = [
        "date","page_date_heading","park_id",
        "crowd_percent","crowd_label",
        "temperature_forecast_c","temperature_actual_c",
        "intensity_forecast_mmph","intensity_actual_mmph",
        "wind_forecast_mps","wind_actual_mps",
        "opening_hours","events"
    ]

    rows_written = 0
    with open(OUT_CSV, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

        for d in daterange(start, end):
            try:
                data = scrape_day(d)
                if data:
                    writer.writerow(data)
                    rows_written += 1
                    print(f"[OK] {d} -> row written")
                else:
                    print(f"[SKIP] {d} -> page not found or error")
            except Exception as e:
                print(f"[ERR]  {d} -> {e}")

    print(f"\nâœ… Done. Wrote {rows_written} rows to {OUT_CSV}")

if __name__ == "__main__":
    main()



[OK] 2023-01-01 -> row written
[OK] 2023-01-02 -> row written
[OK] 2023-01-03 -> row written
[OK] 2023-01-04 -> row written
[OK] 2023-01-05 -> row written
[OK] 2023-01-06 -> row written
[OK] 2023-01-07 -> row written
[OK] 2023-01-08 -> row written
[OK] 2023-01-09 -> row written
[OK] 2023-01-10 -> row written
[OK] 2023-01-11 -> row written
[OK] 2023-01-12 -> row written
[OK] 2023-01-13 -> row written
[OK] 2023-01-14 -> row written
[OK] 2023-01-15 -> row written
[OK] 2023-01-16 -> row written
[OK] 2023-01-17 -> row written
[OK] 2023-01-18 -> row written
[OK] 2023-01-19 -> row written
[OK] 2023-01-20 -> row written
[OK] 2023-01-21 -> row written
[OK] 2023-01-22 -> row written
[OK] 2023-01-23 -> row written
[OK] 2023-01-24 -> row written
[OK] 2023-01-25 -> row written
[OK] 2023-01-26 -> row written
[OK] 2023-01-27 -> row written
[OK] 2023-01-28 -> row written
[OK] 2023-01-29 -> row written
[OK] 2023-01-30 -> row written
[OK] 2023-01-31 -> row written
[OK] 2023-02-01 -> row written
[OK] 202

In [23]:
import pandas as pd

# lijst van jouw bestanden
files = [
    "efteling_meta_2023.csv",
    "efteling_meta_2024.csv",
    "efteling_meta_2025-11-11.csv"
]

# inlezen en samenvoegen
dataframes = [pd.read_csv(f) for f in files]
all_data = pd.concat(dataframes, ignore_index=True)

# sorteren op datum (optioneel)
all_data = all_data.sort_values("date")

# opslaan als Ã©Ã©n bestand
all_data.to_csv("efteling_metadata_all.csv", index=False)

print("âœ… File merged and named: efteling_metadata_all.csv")

âœ… File merged and named: efteling_metadata_all.csv
