In [2]:
import geopandas as gpd
import pandas as pd
import choropleth_pipeline as cp

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LogNorm, TwoSlopeNorm

In [7]:
value_col = "D30"

for year in range(2000,2025):
    vals = pd.read_parquet(f"cache/hitze_metrics_gem_year_{year}.parquet")
    
    vg  = cp.VG250Spec(gpkg_path=r"..\geometry\DE_VG250.gpkg", id_col="ARS")
    gem = cp.load_level(vg, "GEM")[["ARS","geometry"]].copy()
    laender = cp.load_level(vg, 'LAN').copy()
    gem["ARS"] = gem["ARS"].astype("string")
    
    
    gdf = gem.merge(vals, on = 'ARS', how = 'inner')
    
    fig, ax = cp.plot_choropleth_continuous(
        gdf,
        value_col=value_col,
        cmap_name="OrRd",          
        title=f"{value_col} {year}",
        laender = laender,
        figsize=(8, 10),
        norm = TwoSlopeNorm(vmin=0, vcenter=5, vmax=10)
    )
    out_file = cp.save_map(
        fig, 
        filename=f"{value_col}_{year}.png", 
        out_dir="exports/Hitze",
        dpi=300
    )
    plt.close(fig)

In [8]:
vals.columns

Index(['year', 'ARS', 'H30', 'H32', 'H35', 'I30', 'H30_any', 'H32_any',
       'H35_any', 'H30_frac10', 'H32_frac10', 'H35_frac10', 'D30'],
      dtype='object')

In [None]:
import pandas as pd, numpy as np
from pathlib import Path

# ---------- Robust-Standardisierung ----------
def robust_z(s: pd.Series, clip=3.0) -> pd.Series:
    s = s.astype(float)
    med = np.nanmedian(s)
    mad = np.nanmedian(np.abs(s - med))
    scale = 1.4826 * mad
    if not np.isfinite(scale) or scale == 0:  # Fallbacks
        q75, q25 = np.nanpercentile(s, [75, 25])
        iqr = q75 - q25
        scale = iqr / 1.349 if iqr > 0 else np.nanstd(s)
    if not np.isfinite(scale) or scale == 0:
        return pd.Series(np.zeros(len(s)), index=s.index)  # alles gleich
    z = (s - med) / scale
    return z.clip(-clip, clip)

# ---------- Loader ----------
def load_years(cache_dir: str | Path, start_year: int, end_year: int) -> pd.DataFrame:
    cache_dir = Path(cache_dir)
    dfs = []
    for y in range(start_year, end_year + 1):
        f = cache_dir / f"hitze_metrics_gem_year_{y}.parquet"
        if f.exists():
            df = pd.read_parquet(f, columns=["year","ARS","H30","H32","H35","I30"])
            dfs.append(df)
    if not dfs:
        raise FileNotFoundError("Keine Jahres-Parquets im Zeitraum gefunden.")
    out = pd.concat(dfs, ignore_index=True)
    out["ARS"] = out["ARS"].astype(str)  # ARS unverändert als String
    return out

# ---------- Hauptfunktion ----------
def compute_heat_index(cache_dir="cache",
                       start_year=2020, end_year=2024,
                       min_years=3, variant="A",
                       scale_to_0_100=True):
    df = load_years(cache_dir, start_year, end_year)

    # Zeitraummittel je Gemeinde
    agg = (df.groupby("ARS")
             .agg(years_used=("year","nunique"),
                  H30_mean=("H30","mean"),
                  H32_mean=("H32","mean"),
                  H35_mean=("H35","mean"),
                  I30_mean=("I30","mean"))
             .reset_index())
    agg = agg[agg["years_used"] >= min_years].copy()

    # robuste z-Scores (über Gemeinden)
    for col in ["H30_mean","H32_mean","H35_mean","I30_mean"]:
        agg[col + "_z"] = robust_z(agg[col])

    # Index-Formel
    if variant.upper() == "A":  # schlank (empfohlen)
        agg["Index_raw"] = 0.6*agg["I30_mean_z"] + 0.4*agg["H35_mean_z"]
    else:  # "B": ausgewogen
        agg["z_freq"]   = (agg["H30_mean_z"] + agg["H32_mean_z"]) / 2.0
        agg["Index_raw"] = 0.4*agg["z_freq"] + 0.4*agg["I30_mean_z"] + 0.2*agg["H35_mean_z"]

    # robuster 0–100 Score
    if scale_to_0_100:
        p1, p99 = np.nanpercentile(agg["Index_raw"], [1, 99])
        denom = (p99 - p1) if p99 > p1 else (agg["Index_raw"].max() - agg["Index_raw"].min() or 1.0)
        agg["Index_0_100"] = ((agg["Index_raw"] - p1) / denom * 100).clip(0, 100)

    cols = ["ARS","years_used","H30_mean","H32_mean","H35_mean","I30_mean",
            "H30_mean_z","H32_mean_z","H35_mean_z","I30_mean_z","Index_raw"]
    if "Index_0_100" in agg:
        cols.append("Index_0_100")
    return agg[cols].sort_values("Index_raw", ascending=False).reset_index(drop=True)

# ---------- Beispielaufrufe ----------
# 5-Jahres-Fenster 2020–2024, Variante A:
idx_2020_24 = compute_heat_index("cache", 2020, 2024, min_years=3, variant="A")
# Längeres Fenster 2000–2024, Variante B:
idx_2000_24 = compute_heat_index("cache", 2000, 2024, min_years=10, variant="B")

# Optional speichern:
# idx_2020_24.to_parquet("out/hitze_index_2020_2024.parquet", index=False)
# idx_2000_24.to_parquet("out/hitze_index_2000_2024.parquet", index=False)
