# RomKorr – Map Builder (Folium)

Dieses Notebook erstellt eine performantere Karte aus `data/processed/letters_master.parquet`.

**Wichtige Änderungen gegenüber deinem aktuellen Ansatz:**
- **Hotspots**: ein Marker pro Ort (aggregiert), statt ein Marker pro Brief.
- **Linien**: optionales Layer (kann man abschalten) und optional Sampling, damit die HTML nicht explodiert.
- **Links**: nutzen `link_canonical`.

Output:
- `outputs/romkorr_map.html`

In [1]:
from __future__ import annotations

from pathlib import Path
import numpy as np
import pandas as pd

import folium
from folium.plugins import HeatMap, MarkerCluster

In [2]:
PROJECT_ROOT = Path.cwd()

MASTER = PROJECT_ROOT / "data" / "processed" / "letters_master.parquet"
PLACES = PROJECT_ROOT / "data" / "processed" / "places_agg.parquet"

OUT_DIR = PROJECT_ROOT / "outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

OUT_HTML = OUT_DIR / "romkorr_map.html"

In [3]:
letters = pd.read_parquet(MASTER)
places = pd.read_parquet(PLACES)

letters.head()

Unnamed: 0,Date,Sender,Recipient,place_dispatch,place_destination,dispatch_geonames_url,destination_geonames_url,link,dispatch_lat,dispatch_lon,destination_lat,destination_lon,distance_km,letter_id,link_canonical,date_parsed,date_iso
0,"Donnerstag, 6. Januar 1791",August Wilhelm von Schlegel,Christian Gottlob Heyne,Göttingen,Göttingen,https://www.geonames.org/2918632,https://www.geonames.org/2918632,https://briefe-der-romantik.de/letters/view/1?...,51.53443,9.93228,51.53443,9.93228,0.0,1,https://briefe-der-romantik.de/letters/view/1?...,1791-01-06,1791-01-06
1,"Mittwoch, 20. September 1797",August Wilhelm von Schlegel,Christian Gottlob Heyne,Jena,Göttingen,https://www.geonames.org/2895044,https://www.geonames.org/2918632,https://briefe-der-romantik.de/letters/view/2?...,50.92878,11.5899,51.53443,9.93228,133.622388,2,https://briefe-der-romantik.de/letters/view/2?...,1797-09-20,1797-09-20
2,"Samstag, 26. Mai 1798",August Wilhelm von Schlegel,Georg Joachim Göschen,Berlin,Leipzig,https://www.geonames.org/2950159,https://www.geonames.org/2879139,https://briefe-der-romantik.de/letters/view/3?...,52.52437,13.41053,51.33962,12.37129,149.769217,3,https://briefe-der-romantik.de/letters/view/3?...,1798-05-26,1798-05-26
3,"Mittwoch, 31. Oktober 1798",August Wilhelm von Schlegel,Georg Joachim Göschen,Jena,Leipzig,https://www.geonames.org/2895044,https://www.geonames.org/2879139,https://briefe-der-romantik.de/letters/view/4?...,50.92878,11.5899,51.33962,12.37129,71.129758,4,https://briefe-der-romantik.de/letters/view/4?...,1798-10-31,1798-10-31
4,[Mitte August 1801],Sophie Bernhardi,August Wilhelm von Schlegel,Berlin,Jena,https://www.geonames.org/2950159,https://www.geonames.org/2895044,https://briefe-der-romantik.de/letters/view/5?...,52.52437,13.41053,50.92878,11.5899,217.247072,5,https://briefe-der-romantik.de/letters/view/5?...,1801-08-01,1801-08-01


## 1) Basemap + Layer

Layer:
- Heatmap (Counts)
- Hotspot-Marker (aggregiert, klickbar)
- Linien (optional; standardmäßig gesampelt)

In [4]:
def make_map(center=(51.0, 10.0), zoom_start=6) -> folium.Map:
    # prefer_canvas=True -> deutlich schneller bei vielen Vektorobjekten
    return folium.Map(location=center, zoom_start=zoom_start, control_scale=True, prefer_canvas=True)

m = make_map()
m

In [5]:
# Heatmap: counts über Dispatch+Destination zusammen
heat_points = places.groupby(["lat","lon"], as_index=False)["count"].sum()

heat_layer = folium.FeatureGroup(name="Heatmap", show=True)
HeatMap(
    data=heat_points[["lat","lon","count"]].values.tolist(),
    radius=18,
    blur=20,
    max_zoom=7
).add_to(heat_layer)
heat_layer.add_to(m)

heat_points.sort_values("count", ascending=False).head(10)

Unnamed: 0,lat,lon,count
130,52.52437,13.41053,2366
61,50.92878,11.5899,2070
74,51.05089,13.73832,326
126,52.37403,4.88969,298
97,51.33962,12.37129,293
133,52.73679,15.22878,259
10,47.36667,8.55,251
65,50.9803,11.32903,244
123,52.26594,10.52673,233
125,52.37052,9.73322,211


In [6]:
# Hotspot-Marker: ein Marker pro Koordinate, Popup zeigt Beispiele
hotspots_layer = folium.FeatureGroup(name="Hotspots", show=True)
cluster = MarkerCluster(name="Hotspot-Cluster").add_to(hotspots_layer)

def key(lat, lon):
    return (float(lat), float(lon))

# Build examples per coordinate (dispatch + destination)
examples_by_k = {}

def add_examples(sub: pd.DataFrame, lat_col: str, lon_col: str):
    sub = sub.dropna(subset=[lat_col, lon_col]).copy()
    sub["k"] = list(zip(sub[lat_col].astype(float), sub[lon_col].astype(float)))
    # sort by date for nicer examples
    sub = sub.sort_values(["date_iso"], na_position="last")
    for k0, grp in sub.groupby("k"):
        examples_by_k.setdefault(k0, [])
        # extend but cap
        for _, r in grp.head(6).iterrows():
            if len(examples_by_k[k0]) >= 6:
                break
            examples_by_k[k0].append(r)

add_examples(letters, "dispatch_lat", "dispatch_lon")
add_examples(letters, "destination_lat", "destination_lon")

# Place name per coordinate (mode across roles)
place_name_by_k = {}
tmp = places.copy()
tmp["k"] = list(zip(tmp["lat"].astype(float), tmp["lon"].astype(float)))
for k0, grp in tmp.groupby("k"):
    s = grp["place_mode"].dropna()
    place_name_by_k[k0] = (s.mode().iloc[0] if len(s.mode()) else "Ort (unbekannt)")

for _, r in heat_points.sort_values("count", ascending=False).iterrows():
    k0 = key(r["lat"], r["lon"])
    cnt = int(r["count"])
    place_name = place_name_by_k.get(k0, "Ort (unbekannt)")

    ex = examples_by_k.get(k0, [])
    ex_items = []
    for e in ex:
        ex_items.append(
            f"<li><b>{e.get('Sender','?')}</b> → <b>{e.get('Recipient','?')}</b> "
            f"({e.get('date_iso','?')}) "
            f"<a href='{e.get('link_canonical','#')}' target='_blank'>Original</a></li>"
        )

    popup_html = (
        "<div style='max-width: 380px;'>"
        f"<h4 style='margin:0 0 6px 0;'>{place_name}</h4>"
        f"<div><b>Briefe (Dispatch+Destination):</b> {cnt}</div>"
        "<hr style='margin:8px 0;' />"
        "<div><b>Beispiele:</b></div>"
        "<ul style='padding-left: 18px; margin: 6px 0;'>"
        + ("".join(ex_items) if ex_items else "<li>Keine Beispiele verfügbar.</li>")
        + "</ul></div>"
    )

    folium.CircleMarker(
        location=[float(r["lat"]), float(r["lon"])],
        radius=max(3, min(18, np.log1p(cnt) * 3.0)),
        fill=True,
        fill_opacity=0.7,
        popup=folium.Popup(popup_html, max_width=420)
    ).add_to(cluster)

hotspots_layer.add_to(m)

<folium.map.FeatureGroup at 0x23651664610>

In [7]:
# Linienlayer (optional): Sampling, damit die HTML nicht riesig wird.
lines_layer = folium.FeatureGroup(name="Linien (Sample)", show=False)
lines_layer.add_to(m)

SAMPLE_FRAC = 0.25  # 25% der Briefe zeichnen (bei Bedarf hoch/runter)
rng = np.random.default_rng(42)

lines = letters.dropna(subset=["dispatch_lat","dispatch_lon","destination_lat","destination_lon"]).copy()
if 0 < SAMPLE_FRAC < 1:
    lines = lines.loc[rng.random(len(lines)) < SAMPLE_FRAC]

for _, r in lines.iterrows():
    coords = [(r["dispatch_lat"], r["dispatch_lon"]), (r["destination_lat"], r["destination_lon"])]
    popup = (
        f"<b>{r.get('Sender','?')}</b> → <b>{r.get('Recipient','?')}</b><br/>"
        f"{r.get('place_dispatch','?')} → {r.get('place_destination','?')}<br/>"
        f"{r.get('date_iso','?')}<br/>"
        f"Distanz: {float(r.get('distance_km', np.nan)):.1f} km<br/>"
        f"<a href='{r.get('link_canonical','#')}' target='_blank'>Original</a>"
    )
    folium.PolyLine(
        locations=coords,
        weight=1,
        opacity=0.35,
        popup=folium.Popup(popup, max_width=360)
    ).add_to(lines_layer)

In [8]:
folium.LayerControl().add_to(m)
m.save(str(OUT_HTML))
OUT_HTML

WindowsPath('g:/Meine Ablage/CodingProjekte/RomKorr/RomKorr/outputs/romkorr_map.html')