In [4]:
import pandas as pd
import geopandas as gpd
from sklearn.metrics import (
    confusion_matrix, cohen_kappa_score,
    accuracy_score, precision_score, recall_score, f1_score
)


def save_confusion_matrix(gdf, field_true, field_pred, csv_path):
    """
    Berechnet Confusion Matrix + Metriken mit festem Label-Mapping.
    Null/NaN-Werte werden entfernt.
    """

    # --- Hardcoded Mapping ---
    mapping = {
        1: "Forest land",
        2: "Cropland",
        3: "Grassland",
        4: "Wetlands",
        5: "Settlements",
        6: "Other Land"
    }

    # --- NaN-Zeilen filtern ---
    mask = ~(gdf[field_true].isna() | gdf[field_pred].isna())
    df = gdf.loc[mask, [field_true, field_pred]].copy()

    # --- Numerische Labels konsistent (1.0 → 1) ---
    def normalize_label(val):
        if pd.isna(val):
            return None
        try:
            f = float(val)
            if f.is_integer():
                return int(f)
            return f
        except:
            return str(val).strip()

    df[field_true] = df[field_true].apply(normalize_label)
    df[field_pred] = df[field_pred].apply(normalize_label)

    # --- Map Labels über hardcoded Mapping ---
    df[field_true] = df[field_true].map(mapping)
    df[field_pred] = df[field_pred].map(mapping)

    # --- y_true / y_pred ---
    y_true = df[field_true]
    y_pred = df[field_pred]

    # --- Alle Labels aus Mapping, die tatsächlich vorkommen ---
    labels = [mapping[k] for k in sorted(mapping.keys()) if mapping[k] in y_true.values or mapping[k] in y_pred.values]

    # --- Confusion Matrix ---
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cm_df = pd.DataFrame(cm, index=labels, columns=labels)
    cm_df.index.name = "true"
    cm_df.columns.name = "pred"

    # --- Metriken ---
    metrics = {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred, average="weighted", zero_division=0),
        "recall": recall_score(y_true, y_pred, average="weighted", zero_division=0),
        "f1_score": f1_score(y_true, y_pred, average="weighted", zero_division=0),
        "kappa": cohen_kappa_score(y_true, y_pred)
    }
    metrics_df = pd.DataFrame.from_dict(metrics, orient="index", columns=["value"])

    # --- CSV speichern ---
    with open(csv_path, "w", encoding="utf-8") as f:
        f.write("Confusion Matrix\n")
        cm_df.to_csv(f)
        f.write("\nMetrics\n")
        metrics_df.to_csv(f)

    return {
        "confusion_matrix": cm_df,
        "metrics": metrics_df,
        "labels": labels
    }


In [None]:
import geopandas as gpd

In [5]:


gdf=gpd.read_file(r"C:\Users\aebim\Documents\02_Ausbildung\Studium\05_Semester\5230_Geoniformatik_Raumanalyse\Projektarbeit\03_GitHub\data\analysis\av\AV_As_Maximal_Area.gpkg")
field_true="IPCC_AS_Id"
field_pred="IPCC_AV_Id"
csv_path= r"C:\Users\aebim\Documents\02_Ausbildung\Studium\05_Semester\5230_Geoniformatik_Raumanalyse\Projektarbeit\03_GitHub\data\analysis\av\confusion_matrix1.csv"


result = save_confusion_matrix(gdf, field_true, field_pred, csv_path)

print(result["confusion_matrix"])
print(result["metrics"])


pred         Forest land  Cropland  Grassland  Wetlands  Settlements  \
true                                                                   
Forest land         6084       554         38        73            6   
Cropland              21      3105          7        10           16   
Grassland            185      6549        190        22           20   
Wetlands              75       123        417      3304            3   
Settlements          142      2539         76        48          680   
Other Land             3         4          0         0            0   

pred         Other Land  
true                     
Forest land           4  
Cropland              2  
Grassland             5  
Wetlands              0  
Settlements          98  
Other Land            0  
              value
accuracy   0.547597
precision  0.655963
recall     0.547597
f1_score   0.508534
kappa      0.449713
