In [15]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import folium
from folium.plugins import TimestampedGeoJson
import json
from datetime import datetime, timedelta

In [16]:
# ---------- 1. Load GTFS core files ----------
gtfs_path = Path("..") / "data" / "raw" /"budapest_gtfs"
stop_times = pd.read_csv(gtfs_path / "stop_times.txt")
stops = pd.read_csv(gtfs_path / "stops.txt")

In [17]:
# Filter valid lat/lon and arrival times
stops = stops.dropna(subset=["stop_lat", "stop_lon"])
stop_times = stop_times[stop_times["arrival_time"].notnull()]

In [18]:
# Apply clustering again to create proxy districts
coords = stops[["stop_lat", "stop_lon"]].values
kmeans = KMeans(n_clusters=23, random_state=42, n_init='auto')
stops["district_cluster"] = kmeans.fit_predict(coords)

In [19]:
# Merge clusters into stop_times
stop_times = stop_times.merge(stops[["stop_id", "stop_lat", "stop_lon", "district_cluster"]], on="stop_id", how="left")

In [20]:
# Parse hour from arrival time for simplified timeline
def to_hour(t):
    try:
        h = int(t.split(":")[0]) % 24
        return h
    except:
        return None

stop_times["hour"] = stop_times["arrival_time"].astype(str).apply(to_hour)
stop_times = stop_times.dropna(subset=["hour"])

In [21]:
# Simulate timestamps for today with arrival hours
base_date = datetime.today().replace(hour=0, minute=0, second=0, microsecond=0)
stop_times["timestamp"] = stop_times["hour"].apply(lambda h: (base_date + timedelta(hours=int(h))).isoformat())

In [22]:
# Group by cluster + hour for bubble sizes
agg = (
    stop_times.groupby(["district_cluster", "hour", "stop_lat", "stop_lon", "timestamp"])
    .size()
    .reset_index(name="visits")
)

In [23]:
# Prepare GeoJSON features for Folium time slider
features = []
for _, row in agg.iterrows():
    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [row["stop_lon"], row["stop_lat"]],
        },
        "properties": {
            "time": row["timestamp"],
            "style": {"color": "blue"},
            "icon": "circle",
            "iconstyle": {
                "fillColor": "red",
                "fillOpacity": 0.5,
                "stroke": "true",
                "radius": min(10, row["visits"] / 500)
            },
            "popup": f"Cluster {int(row['district_cluster'])}, {int(row['visits'])} visits"
        }
    }
    features.append(feature)

geojson = {
    "type": "FeatureCollection",
    "features": features
}

In [24]:
# Create Folium map
fmap = folium.Map(location=[47.4979, 19.0402], zoom_start=12)
TimestampedGeoJson(
    geojson,
    period="PT1H",
    add_last_point=True,
    auto_play=False,
    loop=False,
    max_speed=1,
    loop_button=True,
    date_options="YYYY-MM-DD HH:mm",
    time_slider_drag_update=True,
).add_to(fmap)

<folium.plugins.timestamped_geo_json.TimestampedGeoJson at 0x182ae9c0d10>

In [25]:
# Save map
map_path = "../data/folium_cluster_time_slider.html"
fmap.save(map_path)
map_path

'../data/folium_cluster_time_slider.html'