### Berechnung der ÖPNV-Taktung

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

# -------------------------------------------------
# 1. GTFS laden
# -------------------------------------------------
stops = pd.read_csv("data/GTFS/stops.txt")
stop_times = pd.read_csv("data/GTFS/stop_times.txt", low_memory=False)
trips = pd.read_csv("data/GTFS/trips.txt")
calendar = pd.read_csv("data/GTFS/calendar.txt")
# optional:
# calendar_dates = pd.read_csv("data/GTFS/calendar_dates.txt")

# -------------------------------------------------
# 2. Zeitspalte -> Minuten ab Mitternacht
# -------------------------------------------------
def parse_time_to_minutes(t):
    if pd.isna(t):
        return None
    parts = str(t).split(":")
    if len(parts) == 2:
        h, m = parts
    elif len(parts) == 3:
        h, m, _s = parts
    else:
        return None
    try:
        h = int(h)
        m = int(m)
        return h * 60 + m
    except ValueError:
        return None

stop_times = stop_times.copy()
stop_times["minutes"] = stop_times["arrival_time"].apply(parse_time_to_minutes)
stop_times = stop_times.dropna(subset=["minutes"])

# -------------------------------------------------
# 3. Hauptverkehrszeit auswählen
# -------------------------------------------------
HVZ_WINDOWS = [
    (360, 540),   # 06:00–09:00
    (960, 1140),  # 16:00–19:00
]

def in_any_window(mins, windows):
    for lo, hi in windows:
        if lo <= mins <= hi:
            return True
    return False

stop_times_hvz = stop_times[
    stop_times["minutes"].apply(lambda mm: in_any_window(mm, HVZ_WINDOWS))
].copy()

# -------------------------------------------------
# 4. Werktägliche Dienste filtern und Trip-Infos mergen
# -------------------------------------------------
trips_filtered = trips.merge(calendar, on="service_id")
trips_filtered = trips_filtered[trips_filtered["monday"] == 1]

stop_times_hvz["trip_id"] = stop_times_hvz["trip_id"].astype(str)
trips_filtered["trip_id"] = trips_filtered["trip_id"].astype(str)

stopdata = stop_times_hvz.merge(
    trips_filtered[["trip_id", "route_id", "service_id"]],
    on="trip_id",
    how="left"
)

stopdata["stop_id"] = stopdata["stop_id"].astype(str)

# -------------------------------------------------
# 5. Haltestellengeometrie (für Karten etc.)
# -------------------------------------------------
stops = stops.copy()
stops["stop_id"] = stops["stop_id"].astype(str)

gdf_stops_all = gpd.GeoDataFrame(
    stops,
    geometry=gpd.points_from_xy(stops["stop_lon"], stops["stop_lat"]),
    crs=EPSG_4326
)

# -------------------------------------------------
# 6. Headway je Stop-ID berechnen
# -------------------------------------------------
def compute_headway_for_window(df, lo, hi, group_col="stop_id", time_col="minutes"):
    result = {}
    for key, group in df.groupby(group_col):
        if pd.isna(key):
            continue
        times = sorted([t for t in group[time_col] if lo <= t <= hi])
        if len(times) < 2:
            continue
        diffs = [b - a for a, b in zip(times[:-1], times[1:])]
        result[key] = sum(diffs) / len(diffs)
    return result

headway_morning = compute_headway_for_window(stopdata, 360, 540, group_col="stop_id")
headway_evening = compute_headway_for_window(stopdata, 960, 1140, group_col="stop_id")

# -------------------------------------------------
# 7. Headway in gdf mappen (nächstgelegene Haltestelle)
# -------------------------------------------------
gdf["nearest_stop_id"] = gdf["nearest_stop_id"].astype(str)

for col in [
    "headway_morning", "headway_evening", "headway_avg",
    "headway_morning_score", "headway_evening_score",
    "headway_avg_score",
]:
    if col in gdf.columns:
        gdf = gdf.drop(columns=[col])

gdf["headway_morning"] = gdf["nearest_stop_id"].map(headway_morning)
gdf["headway_evening"] = gdf["nearest_stop_id"].map(headway_evening)
gdf["headway_avg"] = gdf[["headway_morning", "headway_evening"]].mean(axis=1)

# -------------------------------------------------
# 8. Headway-Scores (z.B. 5–60 Minuten)
# -------------------------------------------------
fixed_min, fixed_max = 5, 60

def scoreify(series):
    return 1 - ((series - fixed_min) / (fixed_max - fixed_min)).clip(lower=0, upper=1)

gdf["headway_morning_score"] = scoreify(gdf["headway_morning"])
gdf["headway_evening_score"] = scoreify(gdf["headway_evening"])
gdf["headway_avg_score"]     = scoreify(gdf["headway_avg"])

# -------------------------------------------------
# 9. Debug: fehlende Headways
# -------------------------------------------------
no_headway_mask = gdf["headway_avg"].isna()
print("Adressen ohne Headway_avg:", int(no_headway_mask.sum()))

na_stops = gdf.loc[no_headway_mask, "nearest_stop_id"].value_counts()
print("Anzahl unterschiedlicher problematischer Haltestellen:", len(na_stops))
print(na_stops.head(20))

if len(na_stops) > 0:
    test_stop = na_stops.index[0]
    sample_times = stopdata.loc[stopdata["stop_id"] == test_stop, "minutes"]
    sample_morning = sorted([t for t in sample_times if 360 <= t <= 540])[:20]
    print("Beispiel-Haltestelle", test_stop, "HVZ-Zeiten morgens:",
          sample_morning)

# ÖPNV in gdf mergen
headway_attribute = ["headway_avg"]

In [None]:
# -------------------------------------------------
# X. Debug: Abfahrts-Statistik pro Haltestelle (HVZ)
# -------------------------------------------------
def minutes_to_hhmm(mins: int) -> str:
    """Hilfsfunktion: 0..1440 Minuten -> 'HH:MM'."""
    h = int(mins) // 60
    m = int(mins) % 60
    h = h % 24  # falls GTFS > 24h nutzt
    return f"{h:02d}:{m:02d}"

def summarize_stop_times(df_stop: pd.DataFrame) -> pd.Series:
    """Erzeugt Debug-Statistik für eine Haltestelle in der HVZ."""
    mins = df_stop["minutes"].dropna().astype(int).tolist()
    if not mins:
        return pd.Series({
            "hvz_dep_total": 0,
            "hvz_dep_morning": 0,
            "hvz_dep_evening": 0,
            "hvz_first": None,
            "hvz_last": None,
            "hvz_sample_times": ""
        })

    mins_sorted = sorted(mins)

    # Morning / Evening nach Deinen HVZ_WINDOWS
    morning = [t for t in mins_sorted if 360 <= t <= 540]
    evening = [t for t in mins_sorted if 960 <= t <= 1140]

    def fmt_first(lst):
        return minutes_to_hhmm(lst[0]) if lst else None

    def fmt_last(lst):
        return minutes_to_hhmm(lst[-1]) if lst else None

    # ein paar Beispielzeiten (gesamt, egal ob morgens/abends)
    sample = ", ".join(minutes_to_hhmm(t) for t in mins_sorted[:10])

    return pd.Series({
        "hvz_dep_total": len(mins_sorted),
        "hvz_dep_morning": len(morning),
        "hvz_dep_evening": len(evening),
        "hvz_first": fmt_first(mins_sorted),
        "hvz_last": fmt_last(mins_sorted),
        "hvz_sample_times": sample
    })

# stopdata enthält alle HVZ-Fahrten mit stop_id und minutes
# -> Gruppierung nach stop_id
stop_debug = (
    stopdata
    .groupby("stop_id", as_index=False)
    .apply(summarize_stop_times)
    .reset_index(drop=True)
)

# Dictionary für schnellen Zugriff im Mapping
stop_debug_dict = (
    stop_debug
    .set_index("stop_id")
    .to_dict(orient="index")
)


## Visualisierung der ÖPNV-Taktung

In [None]:
import folium
from branca.colormap import linear, LinearColormap
import pandas as pd
import numpy as np

# Convert gdf_stops geometry to WGS84
gdf_stops = gdf_stops.to_crs(epsg=4326).copy()

# Haltestellen innerhalb Stadt
gdf_stops_clip = gdf_stops[gdf_stops.geometry.within(CITY_BOUNDING_BOX)].copy()
print("Haltestellen im Stadtpolygon:", len(gdf_stops_clip))

#
# 2. Farbskala nur aus Adressen-Headway
#
# headway_avg = durchschnittliche Taktzeit (Minuten) für diese Adresse
# Annahme: kleiner Wert = besser (häufigere Bedienung)
addr_headway = pd.to_numeric(gdf["headway_avg"], errors="coerce")

hv_valid = addr_headway.dropna()
if len(hv_valid) > 0:
    vmin, vmax = hv_valid.quantile([0.01, 0.99])
    if vmin == vmax:
        # falls alles gleich (z. B. nur eine Linie), spreizen für die Farbskala
        vmin = vmin - 0.1
        vmax = vmax + 0.1
else:
    # Fallback, falls ALLE Adressen NaN sind
    vmin, vmax = (0, 1)

palette_normal = list(linear.RdYlGn_11.colors)
palette_inverted = palette_normal[::-1]
colormap = LinearColormap(
    colors=palette_inverted,
    vmin=vmin,
    vmax=vmax,
).to_step(n=9)
colormap.caption = "Headway pro Adresse (Minuten, kleiner = besser)"

# 3. Karte initialisieren
m = folium.Map(location=CITY_CENTER, zoom_start=13, tiles="cartodbpositron")

# 4. Adressen plotten (farbig nach headway_avg)
#    - Farbig wenn headway_avg da
#    - Hellgrau wenn kein Wert
for _, row in gdf.iterrows():
    hv_addr = row.get("headway_avg", np.nan)
    hv_morning = row.get("headway_morning", np.nan)
    hv_evening = row.get("headway_evening", np.nan)

    if pd.isna(hv_addr):
        # Kein Wert berechnet -> zeichne neutral
        color = "#BBBBBB"
        fill_color = "#BBBBBB"
        hv_label = "kein Wert"
    else:
        color = colormap(hv_addr)
        fill_color = colormap(hv_addr)
        hv_label = f"{hv_addr:.1f} min"

    # Popup mit allen Headways, falls vorhanden
    popup_lines = [
        f"{row.get('Straßenname', '')} {row.get('Hsnr', '')}",
        f"<b>Headway (avg):</b> {hv_label}",
    ]
    if pd.notna(hv_morning):
        popup_lines.append(f"Frühspitze: {hv_morning:.1f} min")
    if pd.notna(hv_evening):
        popup_lines.append(f"Abendspitze: {hv_evening:.1f} min")

    popup_html = "<br>".join(popup_lines)

    folium.CircleMarker(
        location=[row.lat, row.lon],
        radius=5,
        color=color,
        fill=True,
        fill_color=fill_color,
        fill_opacity=0.8,
        weight=1,
        popup=popup_html,
    ).add_to(m)

# 5. Haltestellen plotten (schwarz, neutral + Debug-Infos)
for _, row in gdf_stops_clip.iterrows():
    lat_s = row["stop_lat"]
    lon_s = row["stop_lon"]
    stop_id = str(row.get("stop_id"))
    stop_label = row.get("stop_name", stop_id or "Haltestelle")

    dbg = stop_debug_dict.get(stop_id, None)

    popup_lines = [f"<b>Haltestelle:</b> {stop_label}",
                   f"<b>stop_id:</b> {stop_id}"]

    if dbg is not None:
        popup_lines.append(f"<b>HVZ-Abfahrten gesamt:</b> {dbg['hvz_dep_total']}")
        popup_lines.append(f"&nbsp;&nbsp;Morgens (06–09): {dbg['hvz_dep_morning']}")
        popup_lines.append(f"&nbsp;&nbsp;Abends (16–19): {dbg['hvz_dep_evening']}")
        if dbg["hvz_first"] and dbg["hvz_last"]:
            popup_lines.append(
                f"<b>HVZ-Fenster:</b> {dbg['hvz_first']} – {dbg['hvz_last']}"
            )
        if dbg["hvz_sample_times"]:
            popup_lines.append(
                f"<b>Beispiele:</b> {dbg['hvz_sample_times']}"
            )
    else:
        popup_lines.append("<i>Keine HVZ-Abfahrten gefunden.</i>")

    popup_html = "<br>".join(popup_lines)

    folium.CircleMarker(
        location=[lat_s, lon_s],
        radius=3,
        color="black",
        fill=True,
        fill_color="black",
        fill_opacity=1,
        weight=1,
        popup=popup_html,
    ).add_to(m)

# 6. Legende für die Adressen-Headways
colormap.add_to(m)
m
